vSMC  v3.0.0
Scalable Monte Carlo
opencl.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/utility/opencl.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2016, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_UTILITY_OPENCL_HPP
33 #define VSMC_UTILITY_OPENCL_HPP
34 
35 #include <vsmc/internal/common.hpp>
36 
37 #ifdef __APPLE__
38 #include <OpenCL/opencl.h>
39 #else
40 #include <CL/opencl.h>
41 #endif
42 
43 #ifndef CL_VERSION_1_2
44 #error OpenCL 1.2 support required
45 #endif
46 
47 #ifdef VSMC_CLANG
48 #pragma clang diagnostic push
49 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
50 #endif
51 
52 #ifdef VSMC_GCC
53 #pragma GCC diagnostic push
54 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
55 #endif
56 
57 #ifdef VSMC_INTEL
58 #pragma warning(push)
59 #pragma warning(disable : 1478)
60 #endif
61 
62 #define VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Class, type, name, Name) \
63  ::cl_int name##_info(::cl_##type##_info param_name, \
64  std::size_t param_value_size, void *param_value, \
65  std::size_t *param_value_size_ret) const \
66  { \
67  return internal::cl_error_check( \
68  ::clGet##Name##Info(get(), param_name, param_value_size, \
69  param_value, param_value_size_ret), \
70  "CL" #Class "::" #name "_info", "::clGet" #Name "Info"); \
71  } \
72  \
73  template <typename ParamType> \
74  ::cl_int name##_info( \
75  ::cl_##type##_info param_name, ParamType &param_value) const \
76  { \
77  ParamType v; \
78  ::cl_int status = \
79  name##_info(param_name, sizeof(ParamType), &v, nullptr); \
80  if (status == CL_SUCCESS) \
81  param_value = v; \
82  \
83  return status; \
84  } \
85  \
86  template <typename ParamType> \
87  ::cl_int name##_info(::cl_##type##_info param_name, \
88  std::vector<ParamType> &param_value) const \
89  { \
90  ::cl_int status = CL_SUCCESS; \
91  \
92  std::size_t param_value_size = 0; \
93  status = name##_info(param_name, 0, nullptr, &param_value_size); \
94  if (status != CL_SUCCESS) \
95  return status; \
96  \
97  std::vector<ParamType> v(param_value_size / sizeof(ParamType)); \
98  status = \
99  name##_info(param_name, param_value_size, v.data(), nullptr); \
100  if (status == CL_SUCCESS) \
101  param_value = std::move(v); \
102  \
103  return status; \
104  } \
105  \
106  ::cl_int name##_info( \
107  ::cl_##type##_info param_name, std::string &param_value) const \
108  { \
109  std::vector<char> v; \
110  ::cl_int status = name##_info(param_name, v); \
111  v.push_back('\0'); \
112  if (status == CL_SUCCESS) \
113  param_value = static_cast<const char *>(v.data()); \
114  \
115  return status; \
116  }
117 
118 namespace vsmc
119 {
120 
121 namespace internal
122 {
123 
124 #if VSMC_NO_RUNTIME_ASSERT
125 inline ::cl_int cl_error_check(::cl_int status, const char *, const char *)
126 {
127  return status;
128 }
129 #else // VSMC_NO_RUNTIME_ASSERT
130 inline ::cl_int cl_error_check(::cl_int status, const char *cpp, const char *c)
131 {
132  if (status == CL_SUCCESS)
133  return status;
134 
135  std::string msg;
136  msg += "**";
137  msg += cpp;
138  msg += "**";
139  msg += " failed";
140  msg += "; OpenCL function: ";
141  msg += c;
142  msg += "; Error code: ";
143  msg += std::to_string(status);
144 
145  VSMC_RUNTIME_ASSERT((status == CL_SUCCESS), msg.c_str());
146 
147  return status;
148 }
149 #endif // VSMC_NO_RUNTIME_ASSERT
150 
151 template <typename CLType>
152 inline std::vector<typename CLType::pointer> cl_vec_cpp2c(
153  ::cl_uint n, const CLType *ptr)
154 {
155  std::vector<typename CLType::pointer> vec;
156  for (::cl_uint i = 0; i != n; ++i)
157  vec.push_back(ptr[i].get());
158 
159  return vec;
160 }
161 
162 template <typename CLType>
163 inline std::vector<CLType> cl_vec_c2cpp(
164  ::cl_uint n, const typename CLType::pointer *ptr)
165 {
166  std::vector<CLType> vec;
167  for (::cl_uint i = 0; i != n; ++i)
168  vec.push_back(CLType(ptr[i]));
169 
170  return vec;
171 }
172 
173 } // namespace vsmc::internal
174 
175 class CLNDRange;
176 class CLDevice;
177 class CLPlatform;
178 class CLContextProperties;
179 class CLContext;
180 class CLEvent;
181 class CLMemory;
182 class CLProgram;
183 class CLKernel;
184 class CLCommandQueue;
185 
188 template <typename CLPtr, typename Derived>
189 class CLBase
190 {
191  public:
192  using pointer = CLPtr;
193  using element_type = typename std::remove_pointer<CLPtr>::type;
194 
195  CLBase() : ptr_(nullptr, [](pointer p) { Derived::release(p); }) {}
196 
197  void reset(pointer ptr)
198  {
199  if (ptr != ptr_.get())
200  ptr_.reset(ptr, [](pointer p) { Derived::release(p); });
201  }
202 
203  void swap(CLBase<CLPtr, Derived> &other) { ptr_.swap(other.ptr_); }
204 
205  pointer get() const { return ptr_.get(); }
206 
207  long use_count() const { return ptr_.use_count(); }
208 
209  bool unique() const { return ptr_.unique(); }
210 
211  explicit operator bool() const { return bool(ptr_); }
212 
213  protected:
214  void reset_ptr(pointer ptr) { reset(ptr); }
215 
216  private:
217  std::shared_ptr<element_type> ptr_;
218 }; // class CLBase
219 
222 template <typename CLPtr, typename Derived>
223 inline bool operator==(
224  const CLBase<CLPtr, Derived> &ptr1, const CLBase<CLPtr, Derived> &ptr2)
225 {
226  return ptr1.get() == ptr2.get();
227 }
228 
231 template <typename CLPtr, typename Derived>
232 inline bool operator!=(
233  const CLBase<CLPtr, Derived> &ptr1, const CLBase<CLPtr, Derived> &ptr2)
234 {
235  return ptr1.get() != ptr2.get();
236 }
237 
240 template <typename CLPtr, typename Derived>
241 inline void swap(
242  const CLBase<CLPtr, Derived> &ptr1, const CLBase<CLPtr, Derived> &ptr2)
243 {
244  ptr1.swap(ptr2);
245 }
246 
250 {
251  public:
252  CLNDRange() : dim_(0), range_({0, 0, 0}) {}
253 
254  explicit CLNDRange(std::size_t x) : dim_(1), range_({x, 0, 0}) {}
255 
256  CLNDRange(std::size_t x, std::size_t y) : dim_(2), range_({x, y, 0}) {}
257 
258  CLNDRange(std::size_t x, std::size_t y, std::size_t z)
259  : dim_(3), range_({x, y, z})
260  {
261  }
262 
263  std::size_t dim() const { return dim_; }
264 
265  const std::size_t *data() const
266  {
267  return dim_ == 0 ? nullptr : range_.data();
268  }
269 
270  private:
271  const std::size_t dim_;
272  const std::array<std::size_t, 3> range_;
273 }; // class CLNDRange
274 
277 class CLDevice : public CLBase<::cl_device_id, CLDevice>
278 {
279  public:
280  explicit CLDevice(::cl_device_id ptr = nullptr) { reset_ptr(ptr); }
281 
283  std::vector<CLDevice> creat_sub_devices(
284  const ::cl_device_partition_property *properties) const
285  {
286  ::cl_uint n = 0;
288  ::clCreateSubDevices(get(), properties, 0, nullptr, &n),
289  "CLDevice::sub_devices",
290  "::clCreateSubDevices") != CL_SUCCESS) {
291  return std::vector<CLDevice>();
292  }
293 
294  std::vector<::cl_device_id> vec(n);
295  if (internal::cl_error_check(::clCreateSubDevices(get(), properties, n,
296  vec.data(), nullptr),
297  "CLDevice::sub_devices",
298  "::clCreateSubDevices") != CL_SUCCESS) {
299  return std::vector<CLDevice>();
300  }
301 
302  return internal::cl_vec_c2cpp<CLDevice>(n, vec.data());
303  }
304 
306  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Device, device, get, Device)
307 
308 
309  static ::cl_int release(::cl_device_id ptr)
310  {
311  if (ptr == nullptr)
312  return CL_SUCCESS;
313 
315  ::clReleaseDevice(ptr), "CLDevice::release", "::clReleaseDevice");
316  }
317 }; // class CLDevice
318 
321 class CLPlatform : public CLBase<::cl_platform_id, CLPlatform>
322 {
323  public:
324  explicit CLPlatform(::cl_platform_id ptr = nullptr) { reset_ptr(ptr); }
325 
327  std::vector<CLDevice> get_device(::cl_device_type device_type) const
328  {
329  ::cl_uint n = 0;
331  ::clGetDeviceIDs(get(), device_type, 0, nullptr, &n),
332  "CLPlatform::get_device", "::clGetDeviceIDs") != CL_SUCCESS) {
333  return std::vector<CLDevice>();
334  }
335 
336  std::vector<::cl_device_id> vec(n);
338  ::clGetDeviceIDs(get(), device_type, n, vec.data(), nullptr),
339  "CLPlatform::get_device", "::clGetDeviceIDs") != CL_SUCCESS) {
340  return std::vector<CLDevice>();
341  }
342 
343  return internal::cl_vec_c2cpp<CLDevice>(n, vec.data());
344  }
345 
347  ::cl_int unload_compiler() const
348  {
349  return internal::cl_error_check(::clUnloadPlatformCompiler(get()),
350  "CLPlatform::unload_compiler", "::clUnloadPlatformCompiler");
351  }
352 
354  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Platform, platform, get, Platform)
355 
356  static ::cl_int release(::cl_platform_id) { return CL_SUCCESS; }
357 }; // class CLPlatform
358 
361 inline std::vector<CLPlatform> cl_get_platform()
362 {
363  ::cl_uint n = 0;
364  if (internal::cl_error_check(::clGetPlatformIDs(0, nullptr, &n),
365  "CLPlatform::get_platform", "::clGetPlatformIDs") != CL_SUCCESS) {
366  return std::vector<CLPlatform>();
367  }
368 
369  std::vector<::cl_platform_id> vec(n);
370  if (internal::cl_error_check(::clGetPlatformIDs(n, vec.data(), nullptr),
371  "CLPlatform::get_platform", "::clGetPlatformIDs") != CL_SUCCESS) {
372  return std::vector<CLPlatform>();
373  }
374 
375  return internal::cl_vec_c2cpp<CLPlatform>(n, vec.data());
376 }
377 
381 {
382  public:
383  explicit CLContextProperties(const CLPlatform &platform)
384  : properties_({CL_CONTEXT_PLATFORM,
385  reinterpret_cast<::cl_context_properties>(platform.get()), 0, 0,
386  0})
387  {
388  }
389 
391  const CLPlatform &platform, ::cl_bool interop_user_sync)
392  : properties_({CL_CONTEXT_PLATFORM,
393  reinterpret_cast<::cl_context_properties>(platform.get()),
394  CL_CONTEXT_INTEROP_USER_SYNC, interop_user_sync, 0})
395  {
396  }
397 
398  const ::cl_context_properties *data() const { return properties_.data(); }
399 
400  private:
401  const std::array<::cl_context_properties, 5> properties_;
402 }; // class CLContextProperty
403 
406 class CLContext : public CLBase<::cl_context, CLContext>
407 {
408  public:
409  using pfn_notify_type = void(CL_CALLBACK *)(
410  const char *, const void *, std::size_t, void *);
411 
412  explicit CLContext(::cl_context ptr = nullptr) { reset_ptr(ptr); }
413 
415  CLContext(const CLContextProperties &properties, ::cl_uint num_devices,
416  const CLDevice *devices, pfn_notify_type pfn_notify = nullptr,
417  void *user_data = nullptr)
418  {
419  auto vec = internal::cl_vec_cpp2c(num_devices, devices);
420  ::cl_int status = CL_SUCCESS;
421  ::cl_context ptr = ::clCreateContext(properties.data(), num_devices,
422  vec.data(), pfn_notify, user_data, &status);
423  if (internal::cl_error_check(status, "CLContext::CLContext",
424  "::clCreateContext") == CL_SUCCESS) {
425  reset_ptr(ptr);
426  }
427  }
428 
430  CLContext(const CLContextProperties &properties,
431  ::cl_device_type device_type, pfn_notify_type pfn_notify = nullptr,
432  void *user_data = nullptr)
433  {
434  ::cl_int status = CL_SUCCESS;
435  ::cl_context ptr = ::clCreateContextFromType(
436  properties.data(), device_type, pfn_notify, user_data, &status);
437  if (internal::cl_error_check(status, "CLContext::CLContext",
438  "::clCreateContextFromType") == CL_SUCCESS) {
439  reset_ptr(ptr);
440  }
441  }
442 
444  std::vector<CLDevice> get_device() const
445  {
446  std::vector<::cl_device_id> vec;
447  if (get_info(CL_CONTEXT_DEVICES, vec) != CL_SUCCESS)
448  return std::vector<CLDevice>();
449 
450  return internal::cl_vec_c2cpp<CLDevice>(
451  static_cast<::cl_uint>(vec.size()), vec.data());
452  }
453 
455  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Context, context, get, Context)
456 
457 
458  static ::cl_int release(::cl_context ptr)
459  {
460  if (ptr == nullptr)
461  return CL_SUCCESS;
462 
463  return internal::cl_error_check(::clReleaseContext(ptr),
464  "CLContext::release", "::clReleaseContext");
465  }
466 }; // class CLContext
467 
470 class CLEvent : public CLBase<::cl_event, CLEvent>
471 {
472  public:
473  explicit CLEvent(::cl_event ptr = nullptr) { reset_ptr(ptr); }
474 
476  explicit CLEvent(const CLContext &context)
477  {
478  ::cl_int status = CL_SUCCESS;
479  ::cl_event ptr = ::clCreateUserEvent(context.get(), &status);
480  if (internal::cl_error_check(status, "CLEvent::CLEvent",
481  "::clCreateUserEvent") == CL_SUCCESS) {
482  reset_ptr(ptr);
483  }
484  }
485 
487  ::cl_int set_status(::cl_int execution_status) const
488  {
490  ::clSetUserEventStatus(get(), execution_status),
491  "CLEvent::set_status", "::clSetUserEventStatus");
492  }
493 
496  {
497  ::cl_context ptr = nullptr;
498  return get_info(CL_EVENT_CONTEXT, ptr) == CL_SUCCESS ? CLContext(ptr) :
499  CLContext();
500  }
501 
503  inline CLCommandQueue get_command_queue() const;
504 
506  ::cl_int wait() const
507  {
508  ::cl_event ptr = get();
510  ::clWaitForEvents(1, &ptr), "CLEvent::wait", "::clWaitForEvents");
511  }
512 
514  static ::cl_int wait(::cl_uint num_events, CLEvent *events)
515  {
516  if (num_events == 0)
517  return CL_SUCCESS;
518 
519  auto vec = internal::cl_vec_cpp2c(num_events, events);
521  ::clWaitForEvents(num_events, vec.data()), "CLEvent::wait",
522  "::clWaitForEvents");
523  }
524 
526  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Event, event, get, Event)
527 
528 
530  Event, profiling, get_profiling, EventProfiling)
531 
533  static ::cl_int release(::cl_event ptr)
534  {
535  if (ptr == nullptr)
536  return CL_SUCCESS;
537 
539  ::clReleaseEvent(ptr), "CLEvent::release", "::clReleaseEvent");
540  }
541 }; // class CLEvent
542 
545 class CLMemory : public CLBase<::cl_mem, CLMemory>
546 {
547  public:
548  explicit CLMemory(::cl_mem ptr = nullptr) { reset_ptr(ptr); }
549 
551  CLMemory(const CLContext &context, ::cl_mem_flags flags, std::size_t size,
552  void *host_ptr = nullptr)
553  {
554  ::cl_int status = CL_SUCCESS;
555  ::cl_mem ptr =
556  ::clCreateBuffer(context.get(), flags, size, host_ptr, &status);
557  if (internal::cl_error_check(status, "CLMemory::CLMemory",
558  "::clCreateBuffer") == CL_SUCCESS) {
559  reset_ptr(ptr);
560  }
561  }
562 
564  CLMemory sub_buffer(::cl_mem_flags flags,
565  ::cl_buffer_create_type buffer_create_type,
566  const void *buffer_create_info = nullptr)
567  {
568  ::cl_int status = CL_SUCCESS;
569  ::cl_mem ptr = ::clCreateSubBuffer(
570  get(), flags, buffer_create_type, buffer_create_info, &status);
571  return internal::cl_error_check(status, "CLMemory::sub_buffer",
572  "::clCreateSubBuffer") == CL_SUCCESS ?
573  CLMemory(ptr) :
574  CLMemory();
575  }
576 
578  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Memory, mem, get, MemObject)
579 
580 
581  static ::cl_int release(::cl_mem ptr)
582  {
583  if (ptr == nullptr)
584  return CL_SUCCESS;
585 
586  return internal::cl_error_check(::clReleaseMemObject(ptr),
587  "CLMemory::release", "::clReleaseMemObject");
588  }
589 }; // class CLMemory
590 
593 class CLProgram : public CLBase<::cl_program, CLProgram>
594 {
595  public:
596  using pfn_notify_type = void(CL_CALLBACK *)(::cl_program, void *);
597 
598  explicit CLProgram(::cl_program ptr = nullptr) { reset_ptr(ptr); }
599 
602  const CLContext &context, ::cl_uint count, const std::string *strings)
603  {
604  std::vector<const char *> str;
605  std::vector<std::size_t> len;
606  for (::cl_uint i = 0; i != count; ++i) {
607  str.push_back(strings[i].c_str());
608  len.push_back(strings[i].size());
609  }
610 
611  ::cl_int status = CL_SUCCESS;
612  ::cl_program ptr = ::clCreateProgramWithSource(
613  context.get(), count, str.data(), len.data(), &status);
614  if (internal::cl_error_check(status, "CLProgram::CLProgram",
615  "::clCreateProgramWithSource") == CL_SUCCESS) {
616  reset_ptr(ptr);
617  }
618  }
619 
621  CLProgram(const CLContext &context, ::cl_uint num_devices,
622  const CLDevice *devices, const std::vector<unsigned char> *binaries)
623  {
624  auto vec = internal::cl_vec_cpp2c(num_devices, devices);
625  std::vector<const unsigned char *> bin;
626  std::vector<std::size_t> len;
627  for (::cl_uint i = 0; i != num_devices; ++i) {
628  bin.push_back(binaries[i].data());
629  len.push_back(binaries[i].size());
630  }
631 
632  ::cl_int status = CL_SUCCESS;
633  std::vector<::cl_int> binary_status(num_devices, CL_SUCCESS);
634  ::cl_program ptr =
635  ::clCreateProgramWithBinary(context.get(), num_devices, vec.data(),
636  len.data(), bin.data(), binary_status.data(), &status);
637  bool binary_success = true;
638  for (auto bs : binary_status) {
639  if (internal::cl_error_check(bs, "CLProgram::CLProgram",
640  "::clCreateProgramWithBinary") != CL_SUCCESS) {
641  binary_success = false;
642  break;
643  }
644  }
645  if (binary_success &&
646  internal::cl_error_check(status, "CLProgram::CLProgram",
647  "::clCreateProgramWithBinary") == CL_SUCCESS) {
648  reset_ptr(ptr);
649  }
650  }
651 
653  CLProgram(const CLContext &context, ::cl_uint num_devices,
654  const CLDevice *devices, const std::string &kernel_names)
655  {
656  auto vec = internal::cl_vec_cpp2c(num_devices, devices);
657  ::cl_int status = CL_SUCCESS;
658  ::cl_program ptr = ::clCreateProgramWithBuiltInKernels(context.get(),
659  num_devices, vec.data(), kernel_names.c_str(), &status);
660  if (internal::cl_error_check(status, "CLProgram::CLProgram",
661  "::clCreateProgramWithBuiltInKernels") == CL_SUCCESS) {
662  reset_ptr(ptr);
663  }
664  }
665 
667  CLProgram(const CLContext &context, ::cl_uint num_devices,
668  const CLDevice *devices, const std::string &options = std::string(),
669  ::cl_uint num_input_programs = 0,
670  const CLProgram *input_programs = nullptr,
671  pfn_notify_type pfn_notify = nullptr, void *user_data = nullptr)
672  {
673  auto dvec = internal::cl_vec_cpp2c(num_devices, devices);
674  auto pvec = internal::cl_vec_cpp2c(num_input_programs, input_programs);
675  ::cl_int status = CL_SUCCESS;
676  ::cl_program ptr = ::clLinkProgram(context.get(), num_devices,
677  dvec.data(), options.c_str(), num_input_programs, pvec.data(),
678  pfn_notify, user_data, &status);
679  if (internal::cl_error_check(status, "CLProgram::CLProgram",
680  "::clLinkProgram") == CL_SUCCESS) {
681  reset_ptr(ptr);
682  }
683  }
684 
686  ::cl_int build(::cl_uint num_devices, const CLDevice *devices,
687  const std::string &options = std::string(),
688  pfn_notify_type pfn_notify = nullptr, void *user_data = nullptr) const
689  {
690  auto vec = internal::cl_vec_cpp2c(num_devices, devices);
691  ::cl_int status = ::clBuildProgram(get(), num_devices, vec.data(),
692  options.c_str(), pfn_notify, user_data);
693 #if !VSMC_NO_RUNTIME_ASSERT
694  if (status != CL_SUCCESS) {
695  for (::cl_uint i = 0; i != num_devices; ++i) {
696  std::cerr << std::string(80, '=');
697  std::string name;
698  devices[i].get_info(CL_DEVICE_NAME, name);
699  std::cerr << "Device: " << name << std::endl;
700  std::cerr << std::string(80, '-');
701  std::cerr << "Status: ";
702  switch (build_status(devices[i])) {
703  case CL_BUILD_NONE:
704  std::cerr << "CL_BUILD_NONE" << std::endl;
705  break;
706  case CL_BUILD_ERROR:
707  std::cerr << "CL_BUILD_ERROR" << std::endl;
708  break;
709  case CL_BUILD_SUCCESS:
710  std::cerr << "CL_BUILD_SUCCESS" << std::endl;
711  break;
712  case CL_BUILD_IN_PROGRESS:
713  std::cerr << "CL_BUILD_IN_PROGRESS" << std::endl;
714  break;
715  default: break;
716  }
717  std::cerr << std::string(80, '-');
718  std::cerr << "Options: " << build_options(devices[i])
719  << std::endl;
720  std::cerr << std::string(80, '-');
721  std::cerr << build_log(devices[i]) << std::endl;
722  std::cerr << std::string(80, '-');
723  }
724  }
725 #endif
726 
728  status, "CLProgram::build", "::clBuildProgram");
729  }
730 
732  ::cl_int compile(::cl_uint num_devices, const CLDevice *devices,
733  const std::string &options = std::string(),
734  ::cl_uint num_input_headers = 0,
735  const CLProgram *input_headers = nullptr,
736  const std::string *header_include_names = nullptr,
737  pfn_notify_type pf_notify = nullptr, void *user_data = nullptr)
738  {
739  auto dvec = internal::cl_vec_cpp2c(num_devices, devices);
740  auto pvec = internal::cl_vec_cpp2c(num_input_headers, input_headers);
741  std::vector<const char *> inc_names;
742  for (::cl_uint i = 0; i != num_input_headers; ++i)
743  inc_names.push_back(header_include_names[i].c_str());
744 
746  ::clCompileProgram(get(), num_devices, dvec.data(),
747  options.c_str(), num_input_headers, pvec.data(),
748  inc_names.data(), pf_notify, user_data),
749  "CLProgram::compile", "::clCompileProgram");
750  }
751 
753  ::cl_build_status build_status(const CLDevice &device) const
754  {
755  ::cl_build_status v;
756  get_build_info(device, CL_PROGRAM_BUILD_STATUS, v);
757 
758  return v;
759  }
760 
762  std::string build_options(const CLDevice &device) const
763  {
764  std::string v;
765  get_build_info(device, CL_PROGRAM_BUILD_OPTIONS, v);
766 
767  return v;
768  }
769 
771  std::string build_log(const CLDevice &device) const
772  {
773  std::string v;
774  get_build_info(device, CL_PROGRAM_BUILD_LOG, v);
775 
776  return v;
777  }
778 
780  inline std::vector<CLKernel> create_kernels() const;
781 
784  {
785  ::cl_context ptr = nullptr;
786  return get_info(CL_PROGRAM_CONTEXT, ptr) == CL_SUCCESS ?
787  CLContext(ptr) :
788  CLContext();
789  }
790 
792  std::vector<CLDevice> get_device() const
793  {
794  std::vector<::cl_device_id> vec;
795  if (get_info(CL_PROGRAM_DEVICES, vec) != CL_SUCCESS)
796  return std::vector<CLDevice>();
797 
798  return internal::cl_vec_c2cpp<CLDevice>(
799  static_cast<::cl_uint>(vec.size()), vec.data());
800  }
801 
803  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Program, program, get, Program)
804 
805 
806  ::cl_int get_build_info(const CLDevice &device,
807  ::cl_program_build_info param_name, std::size_t param_value_size,
808  void *param_value, std::size_t *param_value_size_ret) const
809  {
811  ::clGetProgramBuildInfo(get(), device.get(), param_name,
812  param_value_size, param_value, param_value_size_ret),
813  "CLKernel::get_build_info", "::clGetProgramBuildInfo");
814  }
815 
816  template <typename ParamType>
817  ::cl_int get_build_info(const CLDevice &device,
818  ::cl_program_build_info param_name, ParamType &param_value) const
819  {
820  ParamType v;
821  ::cl_int status =
822  get_build_info(device, param_name, sizeof(ParamType), &v, nullptr);
823  if (status == CL_SUCCESS)
824  param_value = v;
825 
826  return status;
827  }
828 
829  template <typename ParamType>
830  ::cl_int get_build_info(const CLDevice &device,
831  ::cl_program_build_info param_name,
832  std::vector<ParamType> &param_value) const
833  {
834  ::cl_int status = CL_SUCCESS;
835 
836  std::size_t param_value_size = 0;
837  status =
838  get_build_info(device, param_name, 0, nullptr, &param_value_size);
839  if (status != CL_SUCCESS)
840  return status;
841 
842  std::vector<ParamType> v(param_value_size / sizeof(ParamType));
843  status = get_build_info(
844  device, param_name, param_value_size, v.data(), nullptr);
845  if (status == CL_SUCCESS)
846  param_value = std::move(v);
847 
848  return status;
849  }
850 
851  ::cl_int get_build_info(const CLDevice &device,
852  ::cl_program_build_info param_name, std::string &param_value) const
853  {
854  std::vector<char> v;
855  ::cl_int status = get_build_info(device, param_name, v);
856  v.push_back('\0');
857  if (status == CL_SUCCESS)
858  param_value = static_cast<const char *>(v.data());
859 
860  return status;
861  }
862 
864  static ::cl_int release(::cl_program ptr)
865  {
866  if (ptr == nullptr)
867  return CL_SUCCESS;
868 
869  return internal::cl_error_check(::clReleaseProgram(ptr),
870  "CLProgram::release", "::clReleaseProgram");
871  }
872 }; // class CLProgram
873 
876 class CLKernel : public CLBase<::cl_kernel, CLKernel>
877 {
878  public:
879  explicit CLKernel(::cl_kernel ptr = nullptr) { reset_ptr(ptr); }
880 
882  CLKernel(const CLProgram &program, const std::string &kernel_name)
883  {
884  ::cl_int status = CL_SUCCESS;
885  ::cl_kernel ptr =
886  ::clCreateKernel(program.get(), kernel_name.c_str(), &status);
887  if (internal::cl_error_check(status, "CLKernel::CLKernel",
888  "::clCreateKernel") == CL_SUCCESS) {
889  reset_ptr(ptr);
890  }
891  }
892 
894  template <typename T>
895  ::cl_int set_arg(::cl_uint arg_index, const T &arg) const
896  {
898  ::clSetKernelArg(get(), arg_index, sizeof(T), &arg),
899  "CLKernel::set_arg", "::clSetKernelArgs");
900  }
901 
903  ::cl_int set_arg(::cl_uint arg_index, const CLMemory &arg) const
904  {
905  ::cl_mem mem = arg.get();
907  ::clSetKernelArg(get(), arg_index, sizeof(::cl_mem), &mem),
908  "CLKernel::set_arg", "::clSetKernelArgs");
909  }
910 
911  VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Kernel, kernel, get, Kernel)
912 
913 
914  ::cl_ulong get_work_group_info(const CLDevice &device,
915  ::cl_kernel_work_group_info param_name, std::size_t param_value_size,
916  void *param_value, std::size_t *param_value_size_ret) const
917  {
919  ::clGetKernelWorkGroupInfo(get(), device.get(), param_name,
920  param_value_size, param_value, param_value_size_ret),
921  "CLKernel::get_work_group_info", "::clGetKernelWorkGroupInfo");
922  }
923 
924  template <typename ParamType>
925  ::cl_int get_work_group_info(const CLDevice &device,
926  ::cl_kernel_work_group_info param_name, ParamType &param_value) const
927  {
928  ParamType v;
929  ::cl_int status = get_work_group_info(
930  device, param_name, sizeof(ParamType), &v, nullptr);
931  if (status == CL_SUCCESS)
932  param_value = v;
933 
934  return status;
935  }
936 
937  template <typename ParamType>
938  ::cl_int get_work_group_info(const CLDevice &device,
939  ::cl_kernel_work_group_info param_name,
940  std::vector<ParamType> &param_value) const
941  {
942  ::cl_int status = CL_SUCCESS;
943 
944  std::size_t param_value_size = 0;
945  status = get_work_group_info(
946  device, param_name, 0, nullptr, &param_value_size);
947  if (status != CL_SUCCESS)
948  return status;
949 
950  std::vector<ParamType> v(param_value_size / sizeof(ParamType));
951  status = get_work_group_info(
952  device, param_name, param_value_size, v.data(), nullptr);
953  if (status == CL_SUCCESS)
954  param_value = std::move(v);
955 
956  return status;
957  }
958 
959  ::cl_int get_work_group_info(const CLDevice &device,
960  ::cl_kernel_work_group_info param_name, std::string &param_value) const
961  {
962  std::vector<char> v;
963  ::cl_int status = get_work_group_info(device, param_name, v);
964  v.push_back('\0');
965  if (status == CL_SUCCESS)
966  param_value = static_cast<const char *>(v.data());
967 
968  return status;
969  }
970 
972  ::cl_ulong get_arg_info(::cl_uint arg_indx,
973  ::cl_kernel_arg_info param_name, std::size_t param_value_size,
974  void *param_value, std::size_t *param_value_size_ret) const
975  {
977  ::clGetKernelArgInfo(get(), arg_indx, param_name, param_value_size,
978  param_value, param_value_size_ret),
979  "CLKernel::get_arg_info", "::clGetKernelArgInfo");
980  }
981 
982  template <typename ParamType>
983  ::cl_int get_arg_info(::cl_uint arg_indx, ::cl_kernel_arg_info param_name,
984  ParamType &param_value) const
985  {
986  ParamType v;
987  ::cl_int status =
988  get_arg_info(arg_indx, param_name, sizeof(ParamType), &v, nullptr);
989  if (status == CL_SUCCESS)
990  param_value = v;
991 
992  return status;
993  }
994 
995  template <typename ParamType>
996  ::cl_int get_arg_info(::cl_uint arg_indx, ::cl_kernel_arg_info param_name,
997  std::vector<ParamType> &param_value) const
998  {
999  ::cl_int status = CL_SUCCESS;
1000 
1001  std::size_t param_value_size = 0;
1002  status =
1003  get_arg_info(arg_indx, param_name, 0, nullptr, &param_value_size);
1004  if (status != CL_SUCCESS)
1005  return status;
1006 
1007  std::vector<ParamType> v(param_value_size / sizeof(ParamType));
1008  status = get_arg_info(
1009  arg_indx, param_name, param_value_size, v.data(), nullptr);
1010  if (status == CL_SUCCESS)
1011  param_value = std::move(v);
1012 
1013  return status;
1014  }
1015 
1016  ::cl_int get_arg_info(::cl_uint arg_indx, ::cl_kernel_arg_info param_name,
1017  std::string &param_value) const
1018  {
1019  std::vector<char> v;
1020  ::cl_int status = get_arg_info(arg_indx, param_name, v);
1021  v.push_back('\0');
1022  if (status == CL_SUCCESS)
1023  param_value = static_cast<const char *>(v.data());
1024 
1025  return status;
1026  }
1027 
1029  static ::cl_int release(::cl_kernel ptr)
1030  {
1031  if (ptr == nullptr)
1032  return CL_SUCCESS;
1033 
1034  return internal::cl_error_check(
1035  ::clReleaseKernel(ptr), "CLKernel::release", "::clReleaseKernel");
1036  }
1037 }; // class CLKernel
1038 
1041 class CLCommandQueue : public CLBase<::cl_command_queue, CLCommandQueue>
1042 {
1043  public:
1044  explicit CLCommandQueue(::cl_command_queue ptr = nullptr)
1045  {
1046  reset_ptr(ptr);
1047  }
1048 
1050  CLCommandQueue(const CLContext &context, const CLDevice &device,
1051  ::cl_command_queue_properties properties = 0)
1052  {
1053  ::cl_int status = CL_SUCCESS;
1054  ::cl_command_queue ptr = ::clCreateCommandQueue(
1055  context.get(), device.get(), properties, &status);
1056  if (internal::cl_error_check(status, "CLCommandQueue::CLCommandQueue",
1057  "::clCreateCommandQueue") == CL_SUCCESS) {
1058  reset_ptr(ptr);
1059  }
1060  }
1061 
1064  {
1065  ::cl_context ptr = nullptr;
1066  return get_info(CL_QUEUE_CONTEXT, ptr) == CL_SUCCESS ? CLContext(ptr) :
1067  CLContext();
1068  }
1069 
1072  {
1073  ::cl_device_id ptr = nullptr;
1074  return get_info(CL_QUEUE_DEVICE, ptr) == CL_SUCCESS ? CLDevice(ptr) :
1075  CLDevice();
1076  }
1077 
1079  ::cl_int enqueue_nd_range_kernel(const CLKernel &kernel,
1080  ::cl_uint work_dim, const CLNDRange &global_work_offset,
1081  const CLNDRange &global_work_size, const CLNDRange &local_work_size,
1082  ::cl_uint num_events_in_wait_list = 0,
1083  const CLEvent *event_wait_list = nullptr,
1084  CLEvent *event = nullptr) const
1085  {
1086  auto eptrs =
1087  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1088  ::cl_event eptr = nullptr;
1089 
1090  ::cl_int status = ::clEnqueueNDRangeKernel(get(), kernel.get(),
1091  work_dim, global_work_offset.data(), global_work_size.data(),
1092  local_work_size.data(), num_events_in_wait_list, eptrs.data(),
1093  &eptr);
1094  internal::cl_error_check(status,
1095  "CLCommandQueue::enqueue_nd_range_kernel",
1096  "::clEnqueueNDRangeKernel");
1097  if (status == CL_SUCCESS && event != nullptr)
1098  event->reset(eptr);
1099 
1100  return status;
1101  }
1102 
1104  ::cl_int enqueue_read_buffer(const CLMemory &buffer,
1105  ::cl_bool blocking_read, std::size_t offset, std::size_t size,
1106  void *ptr, ::cl_uint num_events_in_wait_list = 0,
1107  const CLEvent *event_wait_list = nullptr,
1108  CLEvent *event = nullptr) const
1109  {
1110  auto eptrs =
1111  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1112  ::cl_event eptr = nullptr;
1113 
1114  ::cl_int status =
1115  ::clEnqueueReadBuffer(get(), buffer.get(), blocking_read, offset,
1116  size, ptr, num_events_in_wait_list, eptrs.data(), &eptr);
1117  internal::cl_error_check(status, "CLCommandQueue::enqueue_read_buffer",
1118  "::clEnqueueReadBuffer");
1119  if (status == CL_SUCCESS && event != nullptr)
1120  event->reset(eptr);
1121 
1122  return status;
1123  }
1124 
1126  ::cl_int enqueue_write_buffer(const CLMemory &buffer,
1127  ::cl_bool blocking_write, std::size_t offset, std::size_t size,
1128  void *ptr, ::cl_uint num_events_in_wait_list = 0,
1129  const CLEvent *event_wait_list = nullptr,
1130  CLEvent *event = nullptr) const
1131  {
1132  auto eptrs =
1133  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1134  ::cl_event eptr = nullptr;
1135 
1136  ::cl_int status =
1137  ::clEnqueueWriteBuffer(get(), buffer.get(), blocking_write, offset,
1138  size, ptr, num_events_in_wait_list, eptrs.data(), &eptr);
1139  internal::cl_error_check(status,
1140  "CLCommandQueue::enqueue_write_buffer", "::clEnqueueWriteBuffer");
1141  if (status == CL_SUCCESS && event != nullptr)
1142  event->reset(eptr);
1143 
1144  return status;
1145  }
1146 
1148  ::cl_int enqueue_read_buffer_rect(const CLMemory &buffer,
1149  ::cl_bool blocking_read,
1150  const std::array<std::size_t, 3> &buffer_origin,
1151  const std::array<std::size_t, 3> &host_origin,
1152  const std::array<std::size_t, 3> &region, std::size_t buffer_row_pitch,
1153  std::size_t buffer_slice_pitch, std::size_t host_row_pitch,
1154  std::size_t host_slice_pitch, void *ptr,
1155  ::cl_uint num_events_in_wait_list = 0,
1156  const CLEvent *event_wait_list = nullptr,
1157  CLEvent *event = nullptr) const
1158  {
1159  auto eptrs =
1160  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1161  ::cl_event eptr = nullptr;
1162 
1163  ::cl_int status = ::clEnqueueReadBufferRect(get(), buffer.get(),
1164  blocking_read, buffer_origin.data(), host_origin.data(),
1165  region.data(), buffer_row_pitch, buffer_slice_pitch,
1166  host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
1167  eptrs.data(), &eptr);
1168  internal::cl_error_check(status,
1169  "CLCommandQueue::enqueue_read_buffer_rect",
1170  "::clEnqueueReadBufferRect");
1171  if (status == CL_SUCCESS && event != nullptr)
1172  event->reset(eptr);
1173 
1174  return status;
1175  }
1176 
1178  ::cl_int enqueue_write_buffer_rect(const CLMemory &buffer,
1179  ::cl_bool blocking_write,
1180  const std::array<std::size_t, 3> &buffer_origin,
1181  const std::array<std::size_t, 3> &host_origin,
1182  const std::array<std::size_t, 3> &region, std::size_t buffer_row_pitch,
1183  std::size_t buffer_slice_pitch, std::size_t host_row_pitch,
1184  std::size_t host_slice_pitch, const void *ptr,
1185  ::cl_uint num_events_in_wait_list = 0,
1186  const CLEvent *event_wait_list = nullptr,
1187  CLEvent *event = nullptr) const
1188  {
1189  auto eptrs =
1190  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1191  ::cl_event eptr = nullptr;
1192 
1193  ::cl_int status = ::clEnqueueWriteBufferRect(get(), buffer.get(),
1194  blocking_write, buffer_origin.data(), host_origin.data(),
1195  region.data(), buffer_row_pitch, buffer_slice_pitch,
1196  host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
1197  eptrs.data(), &eptr);
1198  internal::cl_error_check(status,
1199  "CLCommandQueue::enqueue_write_buffer_rect",
1200  "::clEnqueueWriteBufferRect");
1201  if (status == CL_SUCCESS && event != nullptr)
1202  event->reset(eptr);
1203 
1204  return status;
1205  }
1206 
1208  ::cl_int enqueue_copy_buffer(const CLMemory &src_buffer,
1209  const CLMemory &dst_buffer, std::size_t src_offset,
1210  std::size_t dst_offset, std::size_t size,
1211  ::cl_uint num_events_in_wait_list = 0,
1212  const CLEvent *event_wait_list = nullptr,
1213  CLEvent *event = nullptr) const
1214  {
1215  auto eptrs =
1216  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1217  ::cl_event eptr = nullptr;
1218 
1219  ::cl_int status = ::clEnqueueCopyBuffer(get(), src_buffer.get(),
1220  dst_buffer.get(), src_offset, dst_offset, size,
1221  num_events_in_wait_list, eptrs.data(), &eptr);
1222  internal::cl_error_check(status,
1223  "CLCommandQeueue::enqueue_copy_buffer", "::clEnqueueCopyBuffer");
1224  if (status == CL_SUCCESS && event != nullptr)
1225  event->reset(eptr);
1226 
1227  return status;
1228  }
1229 
1231  ::cl_int enqueue_copy_buffer_rect(const CLMemory &src_buffer,
1232  const CLMemory &dst_buffer,
1233  const std::array<std::size_t, 3> &src_origin,
1234  const std::array<std::size_t, 3> &dst_origin,
1235  const std::array<std::size_t, 3> &region, std::size_t src_row_pitch,
1236  std::size_t src_slice_pitch, std::size_t dst_row_pitch,
1237  std::size_t dst_slice_pitch, ::cl_uint num_events_in_wait_list = 0,
1238  const CLEvent *event_wait_list = nullptr,
1239  CLEvent *event = nullptr) const
1240  {
1241  auto eptrs =
1242  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1243  ::cl_event eptr = nullptr;
1244 
1245  ::cl_int status = ::clEnqueueCopyBufferRect(get(), src_buffer.get(),
1246  dst_buffer.get(), src_origin.data(), dst_origin.data(),
1247  region.data(), src_row_pitch, src_slice_pitch, dst_row_pitch,
1248  dst_slice_pitch, num_events_in_wait_list, eptrs.data(), &eptr);
1249  internal::cl_error_check(status,
1250  "CLCommandQueue::enqueue_copy_buffer_rect",
1251  "::clEnqueueCopyBufferRect");
1252  if (status == CL_SUCCESS && event != nullptr)
1253  event->reset(eptr);
1254 
1255  return status;
1256  }
1257 
1259  ::cl_int enqueue_fill_buffer(const CLMemory &buffer, const void *pattern,
1260  std::size_t pattern_size, std::size_t offset, std::size_t size,
1261  ::cl_uint num_events_in_wait_list = 0,
1262  const CLEvent *event_wait_list = nullptr,
1263  CLEvent *event = nullptr) const
1264  {
1265  auto eptrs =
1266  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1267  ::cl_event eptr = nullptr;
1268 
1269  ::cl_int status =
1270  ::clEnqueueFillBuffer(get(), buffer.get(), pattern, pattern_size,
1271  offset, size, num_events_in_wait_list, eptrs.data(), &eptr);
1272  internal::cl_error_check(status, "CLCommandQueue::enqueue_fill_buffer",
1273  "::clEnqueueFillBuffer");
1274  if (status == CL_SUCCESS && event != nullptr)
1275  event->reset(eptr);
1276 
1277  return status;
1278  }
1279 
1281  void *enqueue_map_buffer(const CLMemory &buffer, ::cl_bool blocking_map,
1282  ::cl_map_flags map_flags, std::size_t offset, std::size_t size,
1283  ::cl_uint num_events_in_wait_list = 0,
1284  const CLEvent *event_wait_list = nullptr,
1285  CLEvent *event = nullptr) const
1286  {
1287  auto eptrs =
1288  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1289  ::cl_event eptr = nullptr;
1290 
1291  ::cl_int status = CL_SUCCESS;
1292  void *ptr = ::clEnqueueMapBuffer(get(), buffer.get(), blocking_map,
1293  map_flags, offset, size, num_events_in_wait_list, eptrs.data(),
1294  &eptr, &status);
1295  internal::cl_error_check(status, "CLCommandQueue::enqueue_map_buffer",
1296  "::clEnqueueMapBuffer");
1297  if (status == CL_SUCCESS && event != nullptr)
1298  event->reset(eptr);
1299 
1300  return ptr;
1301  }
1302 
1304  ::cl_int enqueue_unmap_mem_object(const CLMemory &memobj, void *mapped_ptr,
1305  ::cl_uint num_events_in_wait_list = 0,
1306  const CLEvent *event_wait_list = nullptr,
1307  CLEvent *event = nullptr) const
1308  {
1309  auto eptrs =
1310  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1311  ::cl_event eptr = nullptr;
1312 
1313  ::cl_int status = ::clEnqueueUnmapMemObject(get(), memobj.get(),
1314  mapped_ptr, num_events_in_wait_list, eptrs.data(), &eptr);
1315  internal::cl_error_check(status,
1316  "CLCommandQueue::enqueue_unmap_mem_object",
1317  "::clEnqueueUnmapMemObject");
1318  if (status == CL_SUCCESS && event != nullptr)
1319  event->reset(eptr);
1320 
1321  return status;
1322  }
1323 
1325  ::cl_int enqueue_migrate_mem_objects(::cl_uint num_mem_objects,
1326  const CLMemory *mem_objects, ::cl_mem_migration_flags flags,
1327  ::cl_uint num_events_in_wait_list = 0,
1328  const CLEvent *event_wait_list = nullptr,
1329  CLEvent *event = nullptr) const
1330  {
1331  auto mptrs = internal::cl_vec_cpp2c(num_mem_objects, mem_objects);
1332  auto eptrs =
1333  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1334  ::cl_event eptr = nullptr;
1335 
1336  ::cl_int status = ::clEnqueueMigrateMemObjects(get(), num_mem_objects,
1337  mptrs.data(), flags, num_events_in_wait_list, eptrs.data(), &eptr);
1338  internal::cl_error_check(status,
1339  "CLCommandQueue::enqueue_migrate_mem_objects",
1340  "::clEnqueueMigrateMemObjects");
1341  if (status == CL_SUCCESS && event != nullptr)
1342  event->reset(eptr);
1343 
1344  return status;
1345  }
1346 
1349  ::cl_uint num_events_in_wait_list = 0,
1350  const CLEvent *event_wait_list = nullptr,
1351  CLEvent *event = nullptr) const
1352  {
1353  auto eptrs =
1354  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1355  ::cl_event eptr = nullptr;
1356 
1357  ::cl_int status = ::clEnqueueMarkerWithWaitList(
1358  get(), num_events_in_wait_list, eptrs.data(), &eptr);
1359  internal::cl_error_check(status,
1360  "CLCommandQueue::enqueue_marker_with_wait_list",
1361  "::clEnqueueMarkerWithWaitList");
1362  if (status == CL_SUCCESS && event != nullptr)
1363  event->reset(eptr);
1364 
1365  return status;
1366  }
1367 
1370  ::cl_uint num_events_in_wait_list = 0,
1371  const CLEvent *event_wait_list = nullptr,
1372  CLEvent *event = nullptr) const
1373  {
1374  auto eptrs =
1375  internal::cl_vec_cpp2c(num_events_in_wait_list, event_wait_list);
1376  ::cl_event eptr = nullptr;
1377 
1378  ::cl_int status = ::clEnqueueBarrierWithWaitList(
1379  get(), num_events_in_wait_list, eptrs.data(), &eptr);
1380  internal::cl_error_check(status,
1381  "CLCommandQueue::enqueue_barrier_with_wait_list",
1382  "::clEnqueueBarrierWithWaitList");
1383  if (status == CL_SUCCESS && event != nullptr)
1384  event->reset(eptr);
1385 
1386  return status;
1387  }
1388 
1390  ::cl_int flush() const
1391  {
1392  return internal::cl_error_check(
1393  ::clFlush(get()), "CLCommandQueue::flush", "::clFlush");
1394  }
1395 
1397  ::cl_int finish() const
1398  {
1399  return internal::cl_error_check(
1400  ::clFinish(get()), "CLCommandQueue::finish", "::clFinish");
1401  }
1402 
1405  CommandQueue, command_queue, get, CommandQueue)
1406 
1407 
1408  static ::cl_int release(::cl_command_queue ptr)
1409  {
1410  if (ptr == nullptr)
1411  return CL_SUCCESS;
1412 
1413  ::cl_int status = ::clReleaseCommandQueue(ptr);
1415  status, "CLCommandQueue::release", "::clReleaseCommandQueue");
1416 
1417  return status;
1418  }
1419 }; // class CLCommandQueue
1420 
1422 {
1423  ::cl_command_queue ptr = nullptr;
1424  return get_info(CL_EVENT_COMMAND_QUEUE, ptr) == CL_SUCCESS ?
1425  CLCommandQueue(ptr) :
1426  CLCommandQueue();
1427 }
1428 
1429 inline std::vector<CLKernel> CLProgram::create_kernels() const
1430 {
1431  ::cl_int status = CL_SUCCESS;
1432 
1433  ::cl_uint n = 0;
1435  ::clCreateKernelsInProgram(get(), 0, nullptr, &n),
1436  "CLProgram::create_kernels",
1437  "::clCreateKernelsInProgram") != CL_SUCCESS) {
1438  return std::vector<CLKernel>();
1439  }
1440 
1441  std::vector<::cl_kernel> vec(n);
1443  ::clCreateKernelsInProgram(get(), n, vec.data(), nullptr),
1444  "CLProgram::create_kernels",
1445  "::clCreateKernelsInProgram") != CL_SUCCESS) {
1446  return std::vector<CLKernel>();
1447  }
1448 
1449  return internal::cl_vec_c2cpp<CLKernel>(n, vec.data());
1450 }
1451 
1452 } // namespace vsmc
1453 
1454 #ifdef VSMC_CLANG
1455 #pragma clang diagnostic pop
1456 #endif
1457 
1458 #ifdef VSMC_GCC
1459 #pragma GCC diagnostic pop
1460 #endif
1461 
1462 #ifdef VSMC_INTEL
1463 #pragma warning(pop)
1464 #endif
1465 
1466 #endif // VSMC_UTILITY_OPENCL_HPP
void reset(pointer ptr)
Definition: opencl.hpp:197
OpenCL cl_kernel
Definition: opencl.hpp:876
CLMemory(const CLContext &context,::cl_mem_flags flags, std::size_t size, void *host_ptr=nullptr)
clCreateBuffer
Definition: opencl.hpp:551
::cl_int enqueue_copy_buffer(const CLMemory &src_buffer, const CLMemory &dst_buffer, std::size_t src_offset, std::size_t dst_offset, std::size_t size,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueCopyBuffer
Definition: opencl.hpp:1208
Definition: monitor.hpp:48
bool unique() const
Definition: opencl.hpp:209
::cl_int get_work_group_info(const CLDevice &device,::cl_kernel_work_group_info param_name, ParamType &param_value) const
Definition: opencl.hpp:925
::cl_int enqueue_read_buffer(const CLMemory &buffer,::cl_bool blocking_read, std::size_t offset, std::size_t size, void *ptr,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueReadBuffer
Definition: opencl.hpp:1104
CLEvent(const CLContext &context)
clCreateUserEvent
Definition: opencl.hpp:476
std::vector< CLPlatform > cl_get_platform()
clGetPlatformIDs
Definition: opencl.hpp:361
CLMemory(::cl_mem ptr=nullptr)
Definition: opencl.hpp:548
OpenCL cl_program
Definition: opencl.hpp:593
CLKernel(::cl_kernel ptr=nullptr)
Definition: opencl.hpp:879
::cl_int get_arg_info(::cl_uint arg_indx,::cl_kernel_arg_info param_name, std::vector< ParamType > &param_value) const
Definition: opencl.hpp:996
::cl_ulong get_arg_info(::cl_uint arg_indx,::cl_kernel_arg_info param_name, std::size_t param_value_size, void *param_value, std::size_t *param_value_size_ret) const
clGetKernelArgInfo
Definition: opencl.hpp:972
::cl_int enqueue_write_buffer(const CLMemory &buffer,::cl_bool blocking_write, std::size_t offset, std::size_t size, void *ptr,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueWriteBuffer
Definition: opencl.hpp:1126
std::vector< typename CLType::pointer > cl_vec_cpp2c(::cl_uint n, const CLType *ptr)
Definition: opencl.hpp:152
CLContext get_context() const
CL_EVENT_CONTEXT
Definition: opencl.hpp:495
::cl_int finish() const
clFinish
Definition: opencl.hpp:1397
::cl_int enqueue_fill_buffer(const CLMemory &buffer, const void *pattern, std::size_t pattern_size, std::size_t offset, std::size_t size,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueFillBuffer
Definition: opencl.hpp:1259
::cl_build_status build_status(const CLDevice &device) const
CL_PROGRAM_BUILD_STATUS
Definition: opencl.hpp:753
CLNDRange(std::size_t x, std::size_t y)
Definition: opencl.hpp:256
::cl_int wait() const
clWaitForEvents
Definition: opencl.hpp:506
CLNDRange(std::size_t x)
Definition: opencl.hpp:254
pointer get() const
Definition: opencl.hpp:205
::cl_int flush() const
clFlush
Definition: opencl.hpp:1390
const std::size_t * data() const
Definition: opencl.hpp:265
::cl_int enqueue_read_buffer_rect(const CLMemory &buffer,::cl_bool blocking_read, const std::array< std::size_t, 3 > &buffer_origin, const std::array< std::size_t, 3 > &host_origin, const std::array< std::size_t, 3 > &region, std::size_t buffer_row_pitch, std::size_t buffer_slice_pitch, std::size_t host_row_pitch, std::size_t host_slice_pitch, void *ptr,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueReadBufferRect
Definition: opencl.hpp:1148
#define VSMC_RUNTIME_ASSERT(cond, msg)
Definition: assert.hpp:59
OpenCL cl_context
Definition: opencl.hpp:406
::cl_int compile(::cl_uint num_devices, const CLDevice *devices, const std::string &options=std::string(),::cl_uint num_input_headers=0, const CLProgram *input_headers=nullptr, const std::string *header_include_names=nullptr, pfn_notify_type pf_notify=nullptr, void *user_data=nullptr)
clCompileProgram
Definition: opencl.hpp:732
STL namespace.
void reset_ptr(pointer ptr)
Definition: opencl.hpp:214
OpenCL resource management base class.
Definition: opencl.hpp:189
long use_count() const
Definition: opencl.hpp:207
CLContext(::cl_context ptr=nullptr)
Definition: opencl.hpp:412
CLNDRange(std::size_t x, std::size_t y, std::size_t z)
Definition: opencl.hpp:258
::cl_int set_arg(::cl_uint arg_index, const CLMemory &arg) const
clSetKernelArg
Definition: opencl.hpp:903
CLProgram(const CLContext &context,::cl_uint count, const std::string *strings)
clCreateProgramWithSource
Definition: opencl.hpp:601
::cl_int enqueue_migrate_mem_objects(::cl_uint num_mem_objects, const CLMemory *mem_objects,::cl_mem_migration_flags flags,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueMigrateMemObjects
Definition: opencl.hpp:1325
bool operator!=(const SingleParticle< T > &sp1, const SingleParticle< T > &sp2)
CLDevice(::cl_device_id ptr=nullptr)
Definition: opencl.hpp:280
::cl_int build(::cl_uint num_devices, const CLDevice *devices, const std::string &options=std::string(), pfn_notify_type pfn_notify=nullptr, void *user_data=nullptr) const
clBuildProgram
Definition: opencl.hpp:686
CLPlatform(::cl_platform_id ptr=nullptr)
Definition: opencl.hpp:324
::cl_int enqueue_copy_buffer_rect(const CLMemory &src_buffer, const CLMemory &dst_buffer, const std::array< std::size_t, 3 > &src_origin, const std::array< std::size_t, 3 > &dst_origin, const std::array< std::size_t, 3 > &region, std::size_t src_row_pitch, std::size_t src_slice_pitch, std::size_t dst_row_pitch, std::size_t dst_slice_pitch,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueCopyBufferRect
Definition: opencl.hpp:1231
std::string build_log(const CLDevice &device) const
CL_PROGRAM_BUILD_LOG
Definition: opencl.hpp:771
CLContext get_context() const
CL_PROGRAM_CONTEXT
Definition: opencl.hpp:783
std::vector< CLType > cl_vec_c2cpp(::cl_uint n, const typename CLType::pointer *ptr)
Definition: opencl.hpp:163
::cl_int wait(::cl_uint num_events, CLEvent *events)
clWaitForEvents
Definition: opencl.hpp:514
CLContext get_context() const
CL_QUEUE_CONTEXT
Definition: opencl.hpp:1063
void * enqueue_map_buffer(const CLMemory &buffer,::cl_bool blocking_map,::cl_map_flags map_flags, std::size_t offset, std::size_t size,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueMapBuffer
Definition: opencl.hpp:1281
::cl_int set_status(::cl_int execution_status) const
clSetUserEventStatus
Definition: opencl.hpp:487
OpenCL cl_device_id
Definition: opencl.hpp:277
inline::cl_int cl_error_check(::cl_int status, const char *cpp, const char *c)
Definition: opencl.hpp:130
CLCommandQueue(::cl_command_queue ptr=nullptr)
Definition: opencl.hpp:1044
::cl_int enqueue_barrier_with_wait_list(::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueBarrierWithWaitList
Definition: opencl.hpp:1369
::cl_int enqueue_nd_range_kernel(const CLKernel &kernel,::cl_uint work_dim, const CLNDRange &global_work_offset, const CLNDRange &global_work_size, const CLNDRange &local_work_size,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueNDRangeKernel
Definition: opencl.hpp:1079
::cl_int enqueue_marker_with_wait_list(::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueMarkerWithWaitList
Definition: opencl.hpp:1348
std::vector< CLDevice > creat_sub_devices(const ::cl_device_partition_property *properties) const
clCreateSubDevices
Definition: opencl.hpp:283
OpenCL cl_context_properties
Definition: opencl.hpp:380
CLKernel(const CLProgram &program, const std::string &kernel_name)
clCreateKernel
Definition: opencl.hpp:882
std::vector< CLDevice > get_device() const
CL_PROGRAM_DEVICES
Definition: opencl.hpp:792
::cl_int get_info(::cl_device_info param_name, std::size_t param_value_size, void *param_value, std::size_t *param_value_size_ret) const
clGetDeviceInfo
Definition: opencl.hpp:306
::cl_int get_build_info(const CLDevice &device,::cl_program_build_info param_name, std::vector< ParamType > &param_value) const
Definition: opencl.hpp:830
bool operator==(const SingleParticle< T > &sp1, const SingleParticle< T > &sp2)
OpenCL NDRange concept.
Definition: opencl.hpp:249
::cl_int set_arg(::cl_uint arg_index, const T &arg) const
clSetKernelArg
Definition: opencl.hpp:895
CLProgram(const CLContext &context,::cl_uint num_devices, const CLDevice *devices, const std::vector< unsigned char > *binaries)
clCreateProgramWithBinary
Definition: opencl.hpp:621
std::string build_options(const CLDevice &device) const
CL_PROGRAM_BUILD_OPTIONS
Definition: opencl.hpp:762
void(CL_CALLBACK *)(const char *, const void *, std::size_t, void *) pfn_notify_type
Definition: opencl.hpp:410
::cl_int enqueue_unmap_mem_object(const CLMemory &memobj, void *mapped_ptr,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueUnmapMemObject
Definition: opencl.hpp:1304
CLContextProperties(const CLPlatform &platform,::cl_bool interop_user_sync)
Definition: opencl.hpp:390
CLContext(const CLContextProperties &properties,::cl_device_type device_type, pfn_notify_type pfn_notify=nullptr, void *user_data=nullptr)
clCreateContextFromType
Definition: opencl.hpp:430
CLCommandQueue get_command_queue() const
CL_EVENT_COMMAND_QUEUE
Definition: opencl.hpp:1421
CLProgram(::cl_program ptr=nullptr)
Definition: opencl.hpp:598
std::vector< CLDevice > get_device(::cl_device_type device_type) const
clGetDeviceIDs
Definition: opencl.hpp:327
OpenCL cl_command_queue
Definition: opencl.hpp:1041
::cl_int release(::cl_program ptr)
clReleaseProgram
Definition: opencl.hpp:864
CLMemory sub_buffer(::cl_mem_flags flags,::cl_buffer_create_type buffer_create_type, const void *buffer_create_info=nullptr)
clCreateSubBuffer
Definition: opencl.hpp:564
OpenCL cl_mem
Definition: opencl.hpp:545
OpenCL cl_platform_id
Definition: opencl.hpp:321
::cl_int get_arg_info(::cl_uint arg_indx,::cl_kernel_arg_info param_name, std::string &param_value) const
Definition: opencl.hpp:1016
::cl_int get_build_info(const CLDevice &device,::cl_program_build_info param_name, ParamType &param_value) const
Definition: opencl.hpp:817
::cl_int get_build_info(const CLDevice &device,::cl_program_build_info param_name, std::string &param_value) const
Definition: opencl.hpp:851
const ::cl_context_properties * data() const
Definition: opencl.hpp:398
std::vector< CLDevice > get_device() const
CL_CONTEXT_DEVICES
Definition: opencl.hpp:444
CLEvent(::cl_event ptr=nullptr)
Definition: opencl.hpp:473
void(CL_CALLBACK *)(::cl_program, void *) pfn_notify_type
Definition: opencl.hpp:596
CLProgram(const CLContext &context,::cl_uint num_devices, const CLDevice *devices, const std::string &kernel_names)
clCreateProgramWithBuiltInKernels
Definition: opencl.hpp:653
::cl_int release(::cl_kernel ptr)
clReleaseKernel
Definition: opencl.hpp:1029
CLContext(const CLContextProperties &properties,::cl_uint num_devices, const CLDevice *devices, pfn_notify_type pfn_notify=nullptr, void *user_data=nullptr)
clCreateContext
Definition: opencl.hpp:415
CLCommandQueue(const CLContext &context, const CLDevice &device,::cl_command_queue_properties properties=0)
clCreateCommandQueue
Definition: opencl.hpp:1050
void swap(CLBase< CLPtr, Derived > &other)
Definition: opencl.hpp:203
CLDevice get_device() const
CL_QUEUE_DEVICE
Definition: opencl.hpp:1071
typename std::remove_pointer< ::cl_kernel >::type element_type
Definition: opencl.hpp:193
::cl_int enqueue_write_buffer_rect(const CLMemory &buffer,::cl_bool blocking_write, const std::array< std::size_t, 3 > &buffer_origin, const std::array< std::size_t, 3 > &host_origin, const std::array< std::size_t, 3 > &region, std::size_t buffer_row_pitch, std::size_t buffer_slice_pitch, std::size_t host_row_pitch, std::size_t host_slice_pitch, const void *ptr,::cl_uint num_events_in_wait_list=0, const CLEvent *event_wait_list=nullptr, CLEvent *event=nullptr) const
clEnqueueWriteBufferRect
Definition: opencl.hpp:1178
::cl_int get_work_group_info(const CLDevice &device,::cl_kernel_work_group_info param_name, std::string &param_value) const
Definition: opencl.hpp:959
CLProgram(const CLContext &context,::cl_uint num_devices, const CLDevice *devices, const std::string &options=std::string(),::cl_uint num_input_programs=0, const CLProgram *input_programs=nullptr, pfn_notify_type pfn_notify=nullptr, void *user_data=nullptr)
clLinkProgram
Definition: opencl.hpp:667
std::vector< CLKernel > create_kernels() const
clCreateKernelsInProgram
Definition: opencl.hpp:1429
#define VSMC_DEFINE_UTILITY_OPENCL_GET_INFO(Class, type, name, Name)
Definition: opencl.hpp:62
::cl_int get_work_group_info(const CLDevice &device,::cl_kernel_work_group_info param_name, std::vector< ParamType > &param_value) const
Definition: opencl.hpp:938
CLContextProperties(const CLPlatform &platform)
Definition: opencl.hpp:383
std::size_t dim() const
Definition: opencl.hpp:263
::cl_int get_arg_info(::cl_uint arg_indx,::cl_kernel_arg_info param_name, ParamType &param_value) const
Definition: opencl.hpp:983
void swap(StateMatrixBase< Layout, Dim, T > &state1, StateMatrixBase< Layout, Dim, T > &state2) noexcept
Swap two StateMatrixBase objects.
OpenCL cl_event
Definition: opencl.hpp:470
::cl_int unload_compiler() const
clUnloadPlatformCompiler
Definition: opencl.hpp:347