SkelCL
SkelCL is a high level multi GPU skeleton library developed at the university of Münster, Germany.
 All Classes Namespaces Files Functions Variables Typedefs Groups
Device.cpp
Go to the documentation of this file.
1 /*****************************************************************************
2  * Copyright (c) 2011-2012 The SkelCL Team as listed in CREDITS.txt *
3  * http://skelcl.uni-muenster.de *
4  * *
5  * This file is part of SkelCL. *
6  * SkelCL is available under multiple licenses. *
7  * The different licenses are subject to terms and condition as provided *
8  * in the files specifying the license. See "LICENSE.txt" for details *
9  * *
10  *****************************************************************************
11  * *
12  * SkelCL is free software: you can redistribute it and/or modify *
13  * it under the terms of the GNU General Public License as published by *
14  * the Free Software Foundation, either version 3 of the License, or *
15  * (at your option) any later version. See "LICENSE-gpl.txt" for details. *
16  * *
17  * SkelCL is distributed in the hope that it will be useful, *
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
20  * GNU General Public License for more details. *
21  * *
22  *****************************************************************************
23  * *
24  * For non-commercial academic use see the license specified in the file *
25  * "LICENSE-academic.txt". *
26  * *
27  *****************************************************************************
28  * *
29  * If you are interested in other licensing models, including a commercial- *
30  * license, please contact the author at michel.steuwer@uni-muenster.de *
31  * *
32  *****************************************************************************/
33 
39 
40 #include <functional>
41 #include <stdexcept>
42 #include <sstream>
43 #include <string>
44 
45 #define __CL_ENABLE_EXCEPTIONS
46 #include <CL/cl.hpp>
47 #undef __CL_ENABLE_EXCEPTIONS
48 
49 #include <pvsutil/Assert.h>
50 #include <pvsutil/Logger.h>
51 
52 #include "SkelCL/detail/Device.h"
53 
54 #include "SkelCL/detail/DeviceBuffer.h"
55 
56 namespace {
57 
58 #ifndef NDEBUG // DEBUG build
59 
60 std::string printNDRange(const cl::NDRange& range)
61 {
62  std::stringstream s;
63  const size_t* sizes = range;
64  s << "{ ";
65  for (size_t i = 0; i < range.dimensions(); ++i) {
66  s << sizes[i];
67  if (i != range.dimensions()-1) {
68  s << ", ";
69  }
70  }
71  s << " }";
72  return s.str();
73 }
74 
75 #endif
76 
77 void invokeCallback(cl_event /*event*/, cl_int status, void * userData)
78 {
79  auto callback = static_cast<std::function<void()>*>(userData);
80  (*callback)(); // invoke callback
81  delete callback;
82 
83  if (status != CL_COMPLETE) {
84  LOG_ERROR("Event returned with abnormal status (", cl::Error(status), ")");
85  }
86 }
87 
88 } // namespace
89 
90 namespace skelcl {
91 
92 namespace detail {
93 
94 Device::Device(const cl::Device& device,
95  const cl::Platform& platform,
96  const Device::id_type id)
97  : _device(device), _context(), _commandQueue(), _id(id)
98 {
99  try {
100  VECTOR_CLASS<cl::Device> devices(1, _device);
101 
102  // create separate context for every device
103  cl_context_properties props[] = {
104  CL_CONTEXT_PLATFORM,
105  reinterpret_cast<cl_context_properties>( (platform)() ),
106  0
107  };
108  _context = cl::Context(devices, props);
109 
110  // create command queue for every device
111  _commandQueue = cl::CommandQueue(_context, _device);
112  } catch (cl::Error& err) {
113  ABORT_WITH_ERROR(err);
114  }
115 
116  LOG_INFO("Using device `", name(),
117  "' with id: ", _id);
118 }
119 
120 cl::Event Device::enqueue(const cl::Kernel& kernel,
121  const cl::NDRange& global,
122  const cl::NDRange& local,
123  const cl::NDRange& offset,
124  const std::function<void()> callback) const
125 {
126  ASSERT(global.dimensions() == local.dimensions());
127 #pragma GCC diagnostic push
128 #pragma GCC diagnostic ignored "-Wsign-conversion"
129  ONLY_IN_DEBUG(
130  auto globalSizeIsDivisiableByLocalSize = [&] () -> bool {
131  bool isDivisiable = true;
132  for (size_t i = 0; i < global.dimensions(); ++i) {
133  // TODO: Figure out why there is a size_t => long conversion here
134  if (global[i] % local[i] != 0) { isDivisiable = false; break; }
135  }
136  return isDivisiable;
137  });
138 #pragma GCC diagnostic pop
139  ASSERT(globalSizeIsDivisiableByLocalSize());
140 
141  cl::Event event;
142  try {
143  _commandQueue.enqueueNDRangeKernel(kernel, offset, global, local,
144  NULL, &event);
145  _commandQueue.flush(); // always start calculation right away
146  } catch (cl::Error& err) {
147  ABORT_WITH_ERROR(err);
148  }
149 
150  // if callback is given, register the function to be called after the kernel
151  // has finished
152  if (callback != nullptr) {
153  // copy function object to be used as user data
154  // the pointer is deleted inside the invokeCallback wrapper function
155  auto userData = static_cast<void*>(new std::function<void()>(callback));
156  event.setCallback(CL_COMPLETE, ::invokeCallback, userData);
157  }
158 
159  LOG_DEBUG_INFO("Kernel for device ", _id, " enqueued with global range: ",
160  ::printNDRange(global), ", local: ", ::printNDRange(local),
161  ", offset: ", ::printNDRange(offset));
162  return event;
163 }
164 
165 cl::Event Device::enqueueWrite(const DeviceBuffer& buffer,
166  const void* hostPointer,
167  size_t hostOffset) const
168 {
169  cl::Event event;
170  try {
171  _commandQueue.enqueueWriteBuffer(buffer.clBuffer(),
172  CL_FALSE,
173  0,
174  buffer.sizeInBytes(),
175  static_cast<const void*>(
176  static_cast<const char*>(
177  hostPointer)
178  +(hostOffset * buffer.elemSize() ) ),
179  NULL,
180  &event);
181  _commandQueue.flush(); // always start operation right away
182  } catch (cl::Error& err) {
183  ABORT_WITH_ERROR(err);
184  }
185 
186  LOG_DEBUG_INFO("Enqueued write buffer for device ", _id,
187  " (size: ", buffer.sizeInBytes(),
188  ", clBuffer: ", buffer.clBuffer()(),
189  ", deviceOffset: 0",
190  ", hostPointer: ", static_cast<const void*>(
191  static_cast<const char*>(hostPointer)
192  + (hostOffset * buffer.elemSize()) ),
193  ", hostOffset: ", hostOffset*buffer.elemSize() ,")");
194  return event;
195 }
196 
197 cl::Event Device::enqueueWrite(const DeviceBuffer& buffer,
198  void* const hostPointer,
199  size_t size,
200  size_t deviceOffset,
201  size_t hostOffset) const
202 {
203  cl::Event event;
204  try {
205  _commandQueue.enqueueWriteBuffer(buffer.clBuffer(),
206  CL_FALSE,
207  (deviceOffset * buffer.elemSize()),
208  size * buffer.elemSize(),
209  static_cast<void*const>(
210  static_cast<char*const>(
211  hostPointer)
212  +(hostOffset * buffer.elemSize() ) ),
213  NULL,
214  &event);
215  _commandQueue.flush(); // always start operation right away
216  } catch (cl::Error& err) {
217  ABORT_WITH_ERROR(err);
218  }
219 
220  LOG_DEBUG_INFO("Enqueued write buffer for device ", _id,
221  " (size: ", size * buffer.elemSize(),
222  ", clBuffer: ", buffer.clBuffer()(),
223  ", deviceOffset: ", deviceOffset*buffer.elemSize(),
224  ", hostPointer: ", static_cast<void*const>(
225  static_cast<char*const>(hostPointer)
226  + (hostOffset * buffer.elemSize()) ),
227  ", hostOffset: ", hostOffset*buffer.elemSize() ,")");
228  return event;
229 }
230 
231 cl::Event Device::enqueueRead(const DeviceBuffer& buffer,
232  void* hostPointer,
233  size_t hostOffset) const
234 {
235  cl::Event event;
236  try {
237  _commandQueue.enqueueReadBuffer(buffer.clBuffer(),
238  CL_FALSE,
239  0,
240  buffer.sizeInBytes(),
241  static_cast<void*>(
242  static_cast<char*>(
243  hostPointer)
244  +(hostOffset * buffer.elemSize()) ),
245  NULL,
246  &event);
247  _commandQueue.flush(); // always start operation right away
248  } catch (cl::Error& err) {
249  ABORT_WITH_ERROR(err);
250  }
251 
252  LOG_DEBUG_INFO("Enqueued read buffer for device ", _id,
253  " (size: ", buffer.sizeInBytes(),
254  ", clBuffer: ", buffer.clBuffer()(),
255  ", deviceOffset: 0",
256  ", hostPointer: ", static_cast<void*>(
257  static_cast<char*const>(hostPointer)
258  + (hostOffset * buffer.elemSize()) ),
259  ", hostOffset: ", hostOffset * buffer.elemSize() ,")");
260  return event;
261 }
262 
263 cl::Event Device::enqueueRead(const DeviceBuffer& buffer,
264  void* const hostPointer,
265  size_t size,
266  size_t deviceOffset,
267  size_t hostOffset) const
268 {
269  cl::Event event;
270  try {
271  _commandQueue.enqueueReadBuffer(buffer.clBuffer(),
272  CL_FALSE,
273  deviceOffset * buffer.elemSize(),
274  size * buffer.elemSize(),
275  static_cast<void*const>(
276  static_cast<char*const>(
277  hostPointer)
278  +(hostOffset * buffer.elemSize()) ),
279  NULL,
280  &event);
281  _commandQueue.flush(); // always start operation right away
282  } catch (cl::Error& err) {
283  ABORT_WITH_ERROR(err);
284  }
285 
286  LOG_DEBUG_INFO("Enqueued read buffer for device ", _id,
287  " (size: ", size * buffer.elemSize(),
288  ", clBuffer: ", buffer.clBuffer()(),
289  ", deviceOffset: ", deviceOffset * buffer.elemSize(),
290  ", hostPointer: ", static_cast<void*>(
291  static_cast<char*const>(hostPointer)
292  + (hostOffset * buffer.elemSize()) ),
293  ", hostOffset: ", hostOffset * buffer.elemSize() ,")");
294  return event;
295 }
296 
297 cl::Event Device::enqueueCopy(const DeviceBuffer& from,
298  const DeviceBuffer& to,
299  size_t fromOffset,
300  size_t toOffset) const
301 {
302  ASSERT( (from.sizeInBytes() - fromOffset)
303  <= (to.sizeInBytes() - toOffset) );
304  cl::Event event;
305  try {
306  _commandQueue.enqueueCopyBuffer(from.clBuffer(),
307  to.clBuffer(),
308  fromOffset,
309  toOffset,
310  from.sizeInBytes() - fromOffset,
311  NULL,
312  &event);
313  _commandQueue.flush(); // always start operation right away
314  } catch (cl::Error& err) {
315  ABORT_WITH_ERROR(err);
316  }
317 
318  LOG_DEBUG_INFO("Enqueued copy buffer for device ", _id,
319  " (from: ", from.clBuffer()(),
320  ", to: ", to.clBuffer()(),
321  ", size: ", from.sizeInBytes() - fromOffset,
322  ", fromOffset: ", fromOffset,
323  ", toOffset: ", toOffset, ")");
324 
325  return event;
326 }
327 
328 void Device::wait() const
329 {
330  LOG_DEBUG_INFO("Start waiting for device with id: ", _id);
331  try {
332  _commandQueue.finish();
333  } catch (cl::Error& err) {
334  ABORT_WITH_ERROR(err);
335  }
336  LOG_DEBUG_INFO("Finished waiting for device with id: ", _id);
337 }
338 
339 Device::id_type Device::id() const
340 {
341  return _id;
342 }
343 
344 bool Device::isType(Type t) const
345 {
346  return _device.getInfo<CL_DEVICE_TYPE>() == t;
347 }
348 
349 std::string Device::name() const
350 {
351  return _device.getInfo<CL_DEVICE_NAME>();
352 }
353 
354 std::string Device::vendorName() const
355 {
356  return _device.getInfo<CL_DEVICE_VENDOR>();
357 }
358 
359 unsigned int Device::maxClockFrequency() const
360 {
361  return _device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>();
362 }
363 
364 unsigned int Device::maxComputeUnits() const
365 {
366  return _device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
367 }
368 
369 size_t Device::maxWorkGroupSize() const
370 {
371  return _device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
372 }
373 
374 size_t Device::maxWorkGroups() const
375 {
376  return
377  _device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0] / maxWorkGroupSize();
378 }
379 
380 unsigned long Device::globalMemSize() const
381 {
382  return _device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
383 }
384 
385 unsigned long Device::localMemSize() const
386 {
387  return _device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
388 }
389 
390 const cl::Context& Device::clContext() const
391 {
392  return _context;
393 }
394 
395 const cl::Device& Device::clDevice() const
396 {
397  return _device;
398 }
399 
400 bool Device::supportsDouble() const
401 {
402  std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>();
403  return (extensions.find("cl_khr_fp64") != std::string::npos);
404 }
405 
406 std::istream& operator>>(std::istream& stream, Device::Type& type)
407 {
408  std::string s;
409  stream >> s;
410 
411  if (s == "ALL") type = Device::Type::ALL;
412  else if (s == "ANY") type = Device::Type::ANY;
413  else if (s == "CPU") type = Device::Type::CPU;
414  else if (s == "GPU") type = Device::Type::GPU;
415  else if (s == "ACCELERATOR") type = Device::Type::ACCELERATOR;
416  else if (s == "DEFAULT") type = Device::Type::DEFAULT;
417  else throw std::invalid_argument(
418  "Could not parse (" + s + ") as Device::Type.");
419 
420  return stream;
421 }
422 
423 std::ostream& operator<<(std::ostream& stream, const Device::Type& type)
424 {
425  if (type == Device::Type::ANY) return stream << "ANY";
426  if (type == Device::Type::CPU) return stream << "CPU";
427  if (type == Device::Type::GPU) return stream << "GPU";
428  if (type == Device::Type::ACCELERATOR) return stream << "ACCELERATOR";
429  if (type == Device::Type::DEFAULT) return stream << "DEFAULT";
430 
431  throw std::logic_error("This point should never be reached.");
432 }
433 
434 } // namespace detail
435 
436 } // namespace skelcl
SKELCL_DLL detail::DeviceID device(size_t dID)
Creates an OpenCL device ID to be used as parameter of the init(detail::PlatformID, detail::DeviceID) function.
Definition: SkelCL.cpp:76
SKELCL_DLL Local local(size_t sizeInBytes)
Helper function to easily create an Local object with a given size.
Definition: Local.cpp:54
SKELCL_DLL detail::PlatformID platform(size_t pID)
Creates an OpenCL platform ID to be used as parameter of the init(detail::PlatformID, detail::DeviceID) function.
Definition: SkelCL.cpp:71