vSMC
vSMC: Scalable Monte Carlo
cl_manip.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/opencl/cl_manip.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013,2014, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_OPENCL_CL_MANIP_HPP
33 #define VSMC_OPENCL_CL_MANIP_HPP
34 
35 #include <vsmc/internal/common.hpp>
37 
38 namespace vsmc {
39 
48 inline void cl_minmax_local_size (
49  const ::cl::Kernel &kern, const ::cl::Device &dev,
50  std::size_t &factor, std::size_t &lmax, std::size_t &mmax)
51 {
52  try {
53  kern.getWorkGroupInfo(dev,
54  CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &factor);
55  kern.getWorkGroupInfo(dev,
56  CL_KERNEL_WORK_GROUP_SIZE, &lmax);
57  if (factor == 0 || factor > lmax) {
58  factor = lmax = mmax = 0;
59  return;
60  }
61  mmax = lmax / factor;
62  } catch (const ::cl::Error &) {
63  factor = lmax = mmax = 0;
64  }
65 }
66 
69 inline std::size_t cl_min_global_size (std::size_t N, std::size_t local_size)
70 {
71  if (local_size == 0)
72  return N;
73 
74  return (local_size && N % local_size) ?
75  (N / local_size + 1) * local_size : N;
76 }
77 
82 inline std::size_t cl_preferred_work_size (std::size_t N,
83  const ::cl::Kernel &kern, const ::cl::Device &dev,
84  std::size_t &global_size, std::size_t &local_size)
85 {
86  cl::size_t<3> reqd_size;
87  try {
88  kern.getWorkGroupInfo(dev,
89  CL_KERNEL_COMPILE_WORK_GROUP_SIZE, &reqd_size);
90  } catch (const ::cl::Error &) {
91  reqd_size[0] = 0;
92  }
93 
94  if (reqd_size[0] != 0) {
95  local_size = reqd_size[0];
96  global_size = cl_min_global_size(N, local_size);
97 
98  return global_size - N;
99  }
100 
101  std::size_t factor;
102  std::size_t lmax;
103  std::size_t mmax;
104  cl_minmax_local_size(kern, dev, factor, lmax, mmax);
105  if (lmax == 0) {
106  global_size = N;
107  local_size = 0;
108 
109  return global_size - N;
110  }
111 
112  local_size = lmax;
113  global_size = cl_min_global_size(N, local_size);
114  std::size_t diff_size = global_size - N;
115  for (std::size_t m = mmax; m >= 1; --m) {
116  std::size_t l = m * factor;
117  std::size_t g = cl_min_global_size(N, l);
118  std::size_t d = g - N;
119  if (d < diff_size) {
120  local_size = l;
121  global_size = g;
122  diff_size = d;
123  }
124  }
125 
126  return diff_size;
127 }
128 
129 inline void cl_set_kernel_args (::cl::Kernel &, ::cl_uint) {}
130 
131 #if VSMC_HAS_CXX11_VARIADIC_TEMPLATES
132 template <typename Arg1, typename... Args>
142 inline void cl_set_kernel_args (::cl::Kernel &kern, ::cl_uint offset,
143  const Arg1 &arg1, const Args &... args)
144 {
145  kern.setArg(offset, arg1);
146  cl_set_kernel_args(kern, offset + 1, args...);
147 }
148 #else // VSMC_HAS_CXX11_VARIADIC_TEMPLATES
150 #endif // VSMC_HAS_CXX11_VARIADIC_TEMPLATES
151 
152 } // namespace vsmc
153 
154 #endif // VSMC_OPENCL_CL_MANIP_HPP
Definition: adapter.hpp:37
void cl_minmax_local_size(const ::cl::Kernel &kern, const ::cl::Device &dev, std::size_t &factor, std::size_t &lmax, std::size_t &mmax)
Query the preferred factor of local size.
Definition: cl_manip.hpp:48
std::size_t cl_min_global_size(std::size_t N, std::size_t local_size)
The minimum global size that is a multiple of the local size.
Definition: cl_manip.hpp:69
void cl_set_kernel_args(::cl::Kernel &,::cl_uint)
Definition: cl_manip.hpp:129
std::size_t cl_preferred_work_size(std::size_t N, const ::cl::Kernel &kern, const ::cl::Device &dev, std::size_t &global_size, std::size_t &local_size)
The preferred global and local size.
Definition: cl_manip.hpp:82