32 #ifndef VSMC_OPENCL_CL_MANAGER_HPP
33 #define VSMC_OPENCL_CL_MANAGER_HPP
42 #define VSMC_RUNTIME_ASSERT_OPENCL_CL_MANAGER_SETUP(func) \
43 VSMC_RUNTIME_ASSERT((setup()), \
44 ("**CLManager::"#func"** CAN ONLY BE CALLED AFTER TRUE " \
45 "**CLManager::setup**"));
47 #define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_PLATFORM \
48 VSMC_RUNTIME_WARNING(setup_platform, \
49 ("**CLManager::setup** FAILED TO SETUP A PLATFORM"));
51 #define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_CONTEXT \
52 VSMC_RUNTIME_WARNING(setup_context, \
53 ("**CLManager::setup** FAILED TO SETUP A CONTEXT"));
55 #define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_DEVICE \
56 VSMC_RUNTIME_WARNING(setup_device, \
57 ("**CLManager::setup** FAILED TO SETUP A DEVICE"));
59 #define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_COMMAND_QUEUE \
60 VSMC_RUNTIME_WARNING(setup_command_queue, \
61 ("**CLManager::setup** FAILED TO SETUP A COMMAND_QUEUE"));
63 #define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_BLOCK(func, block, event) \
64 VSMC_RUNTIME_WARNING((block || event != VSMC_NULLPTR), \
65 ("**CLManager::"#func" NOT BLOCKING BUT WITH NULL EVENT"))
128 template <
typename ID = CLDefault>
156 const ::cl::Platform &
platform ()
const {
return platform_;}
159 const ::cl::Context &
context ()
const {
return context_;}
162 const ::cl::Device &
device ()
const {
return device_;}
165 const std::vector< ::cl::Device> &
device_vec ()
const {
return device_vec_;}
172 bool setup ()
const {
return setup_;}
179 setup_cl_manager(dev);
189 bool setup (const ::cl::Platform &plat, const ::cl::Context &ctx,
190 const ::cl::Device &dev, const ::cl::CommandQueue &cmd)
196 device_vec_ = context_.getInfo<CL_CONTEXT_DEVICES>();
197 command_queue_ = cmd;
198 check_opencl_version();
205 template <
typename CLType>
213 return ::cl::Buffer();
215 return ::cl::Buffer(context_, flag,
sizeof(CLType) * num, host_ptr);
220 template <
typename CLType,
typename OutputIter>
222 OutputIter first, std::size_t offset = 0,
224 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
230 std::vector<CLType> buffer(num);
231 command_queue_.enqueueReadBuffer(buf, static_cast< ::cl_bool>(block),
232 sizeof(CLType) * offset,
sizeof(CLType) * num,
233 &buffer[0], events, event);
234 std::copy(buffer.begin(), buffer.end(), first);
239 template <
typename CLType>
241 CLType *first, std::size_t offset = 0,
243 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
249 command_queue_.enqueueReadBuffer(buf, static_cast< ::cl_bool>(block),
250 sizeof(CLType) * offset,
sizeof(CLType) * num,
251 first, events, event);
256 template <
typename CLType,
typename InputIter>
258 InputIter first, std::size_t offset = 0,
260 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
266 std::vector<CLType> buffer(num);
267 #if VSMC_HAS_CXX11LIB_ALGORITHM
268 std::copy_n(first, num, &buffer[0]);
270 for (std::size_t i = 0; i != num; ++i, ++first)
273 command_queue_.enqueueWriteBuffer(buf, static_cast< ::cl_bool>(block),
274 sizeof(CLType) * offset,
sizeof(CLType) * num,
275 &buffer[0], events, event);
280 template <
typename CLType>
282 const CLType *first, std::size_t offset = 0,
284 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
290 command_queue_.enqueueWriteBuffer(buf, static_cast< ::cl_bool>(block),
291 sizeof(CLType) * offset,
sizeof(CLType) * num,
292 const_cast<CLType *>(first), events, event);
297 template <
typename CLType>
299 CLType *first, std::size_t offset = 0,
301 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
307 command_queue_.enqueueWriteBuffer(buf, static_cast< ::cl_bool>(block),
308 sizeof(CLType) * offset,
sizeof(CLType) * num,
309 first, events, event);
314 template <
typename CLType>
315 void copy_buffer (const ::cl::Buffer &src, const ::cl::Buffer &dst,
317 std::size_t src_offset = 0, std::size_t dst_offset = 0,
319 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
327 command_queue_.enqueueCopyBuffer(src, dst,
328 sizeof(CLType) * src_offset,
sizeof(CLType) * dst_offset,
329 sizeof(CLType) * num, events, eptr);
349 std::size_t local_size = 0,
351 ::cl::Event *event =
VSMC_NULLPTR,
bool block =
true)
const
357 command_queue_.enqueueNDRangeKernel(kern, ::cl::NullRange,
358 get_global_nd_range(N, local_size),
359 get_local_nd_range(local_size), events, eptr);
385 template <
typename Func>
390 const Func &func, std::size_t lmin = 0, std::size_t repeat = 10)
392 cl::size_t<3> reqd_size;
394 kern.getWorkGroupInfo(device_,
395 CL_KERNEL_COMPILE_WORK_GROUP_SIZE, &reqd_size);
396 }
catch (const ::cl::Error &) {
400 if (reqd_size[0] != 0)
410 if (lmin != 0 && lmin <= lmax) {
412 mmax = lmax / factor;
413 lmax = mmax * factor;
419 double time = std::numeric_limits<double>::max
VSMC_MNE ();
420 std::size_t lsize = lmax;
423 for (std::size_t m = mmax; m >= 1; --m) {
424 std::size_t l = m * factor;
429 for (std::size_t r = 0; r != repeat; ++r) {
444 std::size_t lmin = 0, std::size_t repeat = 3)
445 {
return profile_kernel(kern, N, profile_kernel_func_(), lmin, repeat);}
449 {return ::cl::Program(context_, source);}
455 std::vector<std::pair<const char *, std::size_t> > src(source.size());
456 for (std::size_t i = 0; i != source.size(); ++i)
457 src[i] = std::make_pair(source[i].data(), source[i].size());
459 return ::cl::Program(context_, src);
472 const std::vector< ::cl::Device> *devices,
475 std::vector<std::pair<const void *, std::size_t> > bin(binary.size());
476 for (std::size_t i = 0; i != binary.size(); ++i) {
477 bin[i] = std::make_pair(
478 static_cast<const void *>(binary[i].data()),
483 ::cl::Program(context_, device_vec_, bin, status):
484 ::cl::Program(context_, *devices, bin, status);
489 struct profile_kernel_func_ {
void operator() (::cl::Kernel &)
const {}};
491 ::cl::Platform platform_;
492 ::cl::Context context_;
493 ::cl::Device device_;
494 std::vector< ::cl::Device> device_vec_;
495 ::cl::CommandQueue command_queue_;
498 CLSetup<ID> &setup_default_;
500 int opencl_c_version_;
502 CLManager () : setup_(false), setup_default_(CLSetup<ID>::
instance())
503 {setup_cl_manager(setup_default_.device_type());}
505 CLManager (
const CLManager<ID> &);
507 CLManager<ID> &operator= (
const CLManager<ID> &);
509 void check_opencl_version ()
513 for (std::size_t i = 0; i != device_vec_.size(); ++i) {
516 if (opencl_version_ > ocl)
517 opencl_version_ = ocl;
518 if (opencl_c_version_ > ocl)
519 opencl_c_version_ = oclc;
523 void setup_cl_manager (::cl_device_type dev_type)
527 bool setup_platform = platform_filter(dev_type);
529 if (!setup_platform)
return;
531 bool setup_context =
false;
532 bool setup_device =
false;
534 std::vector< ::cl::Device> dev_pool;
535 std::vector< ::cl::Device> dev_select;
536 platform_.getDevices(dev_type, &dev_pool);
537 device_filter(dev_pool, dev_select);
538 if (dev_select.size() != 0) {
539 ::cl_context_properties context_properties[] = {
541 reinterpret_cast< ::cl_context_properties
>(platform_()), 0
543 context_ = ::cl::Context(dev_select, context_properties);
544 setup_context =
true;
545 device_vec_ = context_.getInfo<CL_CONTEXT_DEVICES>();
546 device_ = device_vec_[0];
549 }
catch (const ::cl::Error &) {}
552 if (!setup_context)
return;
553 if (!setup_device)
return;
555 bool setup_command_queue =
false;
557 command_queue_ = ::cl::CommandQueue(context_, device_, 0);
558 setup_command_queue =
true;
559 }
catch (const ::cl::Error &) {}
561 if (!setup_command_queue)
return;
563 check_opencl_version();
568 bool platform_filter (::cl_device_type dev_type)
570 std::vector< ::cl::Platform> platform_vec;
573 }
catch (const ::cl::Error &) {
574 platform_vec.clear();
576 if (platform_vec.size() == 0)
580 if (!setup_default_.default_platform()) {
581 for (std::size_t p = 0; p != platform_vec.size(); ++p) {
584 platform_vec[p].getInfo(CL_PLATFORM_NAME, &name);
585 if (setup_default_.check_platform(name)) {
586 platform_ = platform_vec[p];
589 }
catch (const ::cl::Error &) {}
596 for (std::size_t p = 0; p != platform_vec.size(); ++p) {
598 std::vector< ::cl::Device> dev_pool;
599 std::vector< ::cl::Device> dev_select;
600 platform_vec[p].getDevices(dev_type, &dev_pool);
601 device_filter(dev_pool, dev_select);
602 if (dev_select.size() != 0) {
603 platform_ = platform_vec[p];
606 }
catch (const ::cl::Error &) {}
612 void device_filter (
const std::vector< ::cl::Device> &dev_pool,
613 std::vector< ::cl::Device> &dev_select)
615 std::vector<bool> dev_select_idx(dev_pool.size(),
true);
618 if (!setup_default_.default_device_vendor()) {
619 for (std::size_t d = 0; d != dev_pool.size(); ++d) {
622 dev_pool[d].getInfo(CL_DEVICE_VENDOR, &str);
623 if (!setup_default_.check_device_vendor(str))
624 dev_select_idx[d] =
false;
625 }
catch (const ::cl::Error &) {
626 dev_select_idx[d] =
false;
632 if (!setup_default_.default_device()) {
633 for (std::size_t d = 0; d != dev_pool.size(); ++d) {
636 dev_pool[d].getInfo(CL_DEVICE_NAME, &str);
637 if (!setup_default_.check_device(str))
638 dev_select_idx[d] =
false;
639 }
catch (const ::cl::Error &) {
640 dev_select_idx[d] =
false;
645 for (std::size_t d = 0; d != dev_pool.size(); ++d) {
646 if (dev_select_idx[d]) {
648 dev_select.push_back(dev_pool[d]);
649 }
catch (const ::cl::Error &) {}
654 ::cl::NDRange get_global_nd_range (
655 std::size_t N, std::size_t local_size)
const
658 return ::cl::NDRange(N);
660 if (N % local_size == 0)
661 return ::cl::NDRange(N);
663 return ::cl::NDRange((N / local_size + 1) * local_size);
666 ::cl::NDRange get_local_nd_range (std::size_t local_size)
const
667 {
return local_size == 0 ? ::cl::NullRange : ::cl::NDRange(local_size);}
672 #endif // VSMC_OPENCL_CL_MANAGER_HPP
::cl::Program create_program(const std::vector< std::string > &source) const
Create a program given a vector of sources within the current context.
const ::cl::Context & context() const
The context currently being used.
void read_buffer(const ::cl::Buffer &buf, std::size_t num, CLType *first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Read an OpenCL buffer of a given type and number of elements into a pointer.
void read_buffer(const ::cl::Buffer &buf, std::size_t num, OutputIter first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Read an OpenCL buffer of a given type and number of elements into an iterator.
bool setup(::cl_device_type dev)
Try to setup the platform, context, device and command queue using the given device type...
void write_buffer(const ::cl::Buffer &buf, std::size_t num, CLType *first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Write an OpenCL buffer of a given type and number of elements from a pointer.
#define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_BLOCK(func, block, event)
void copy_buffer(const ::cl::Buffer &src, const ::cl::Buffer &dst, std::size_t num, std::size_t src_offset=0, std::size_t dst_offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Copy an OpenCL buffer into another of a given type and number of elements.
void write_buffer(const ::cl::Buffer &buf, std::size_t num, InputIter first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Write an OpenCL buffer of a given type and number of elements from an iterator.
const ::cl::CommandQueue & command_queue() const
The command queue currently being used.
int opencl_c_version() const
The minimum OpenCL C version supported by all devices in the context of this manager.
void reset()
Stop and reset the elapsed time to zero.
T & get(Array< T, N > &ary)
Array ADL of get.
void write_buffer(const ::cl::Buffer &buf, std::size_t num, const CLType *first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Write an OpenCL buffer of a given type and number of elements from a pointer.
#define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_PLATFORM
void cl_minmax_local_size(const ::cl::Kernel &kern, const ::cl::Device &dev, std::size_t &factor, std::size_t &lmax, std::size_t &mmax)
Query the preferred factor of local size.
static int opencl_c_version(const ::cl::Device &dev)
Return the OpenCL C version of a device.
::cl::Buffer create_buffer(std::size_t num,::cl_mem_flags flag=CL_MEM_READ_WRITE, void *host_ptr=nullptr) const
Create an OpenCL buffer of a given type and number of elements.
std::size_t profile_kernel(::cl::Kernel &kern, std::size_t N, std::size_t lmin=0, std::size_t repeat=3)
#define VSMC_MNE
Avoid MSVC stupid behavior: MNE = Macro No Expansion.
StopWatch as an adapter of C++11 clock.
const std::vector< ::cl::Device > & device_vec() const
The vector of all device that is in the context of this manager.
void run_kernel(const ::cl::Kernel &kern, std::size_t N, std::size_t local_size=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Run a given kernel with one dimensional global size and local size on the current command queue...
bool stop()
Stop the watch, no effect if already stopped.
#define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_COMMAND_QUEUE
#define VSMC_NULLPTR
nullptr
double milliseconds() const
Return the accumulated elapsed time in milliseconds.
bool setup(const ::cl::Platform &plat, const ::cl::Context &ctx, const ::cl::Device &dev, const ::cl::CommandQueue &cmd)
Set the platform, context, device and command queue manually.
int opencl_version() const
The minimum OpenCL version supported by all devices in the context of this manager.
#define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_CONTEXT
const ::cl::Platform & platform() const
The platform currently being used.
cxx11::enable_if< !cxx11::is_same< Func, std::size_t >::value &&!cxx11::is_convertible< Func, std::size_t >::value, std::size_t >::type profile_kernel(::cl::Kernel &kern, std::size_t N, const Func &func, std::size_t lmin=0, std::size_t repeat=10)
Run the kernel with all local size that are multiples of the preferred factor, return the local size ...
#define VSMC_RUNTIME_ASSERT_OPENCL_CL_MANAGER_SETUP(func)
static int opencl_version(const ::cl::Device &dev)
Return the OpenCL version of a device.
bool start()
Start the watch, no effect if already started.
::cl::Program create_program(const std::string &source) const
Create a program given the source within the current context.
static CLManager< ID > & instance()
Get an instance of the manager singleton.
#define VSMC_RUNTIME_WARNING_OPENCL_CL_MANAGER_SETUP_DEVICE
::cl::Program create_program(const std::vector< std::string > &binary, const std::vector< ::cl::Device > *devices, std::vector< ::cl_int > *status=nullptr) const
Create a program given binaries within the current context.
bool setup() const
Whether the platform, context, device and command queue has been setup correctly. ...
const ::cl::Device & device() const
The device currently being used.