45 #define __CL_ENABLE_EXCEPTIONS
47 #undef __CL_ENABLE_EXCEPTIONS
49 #include <pvsutil/Assert.h>
50 #include <pvsutil/Logger.h>
52 #include "SkelCL/detail/Device.h"
54 #include "SkelCL/detail/DeviceBuffer.h"
58 #ifndef NDEBUG // DEBUG build
60 std::string printNDRange(
const cl::NDRange& range)
63 const size_t* sizes = range;
65 for (
size_t i = 0; i < range.dimensions(); ++i) {
67 if (i != range.dimensions()-1) {
77 void invokeCallback(cl_event , cl_int status,
void * userData)
79 auto callback =
static_cast<std::function<void()>*
>(userData);
83 if (status != CL_COMPLETE) {
84 LOG_ERROR(
"Event returned with abnormal status (", cl::Error(status),
")");
94 Device::Device(
const cl::Device&
device,
96 const Device::id_type
id)
97 : _device(device), _context(), _commandQueue(), _id(id)
100 VECTOR_CLASS<cl::Device> devices(1, _device);
103 cl_context_properties props[] = {
105 reinterpret_cast<cl_context_properties
>( (
platform)() ),
108 _context = cl::Context(devices, props);
111 _commandQueue = cl::CommandQueue(_context, _device);
112 }
catch (cl::Error& err) {
113 ABORT_WITH_ERROR(err);
116 LOG_INFO(
"Using device `", name(),
120 cl::Event Device::enqueue(
const cl::Kernel& kernel,
121 const cl::NDRange& global,
122 const cl::NDRange&
local,
123 const cl::NDRange& offset,
124 const std::function<
void()> callback)
const
126 ASSERT(global.dimensions() == local.dimensions());
127 #pragma GCC diagnostic push
128 #pragma GCC diagnostic ignored "-Wsign-conversion"
130 auto globalSizeIsDivisiableByLocalSize = [&] () ->
bool {
131 bool isDivisiable =
true;
132 for (
size_t i = 0; i < global.dimensions(); ++i) {
134 if (global[i] % local[i] != 0) { isDivisiable =
false;
break; }
138 #pragma GCC diagnostic pop
139 ASSERT(globalSizeIsDivisiableByLocalSize());
143 _commandQueue.enqueueNDRangeKernel(kernel, offset, global, local,
145 _commandQueue.flush();
146 }
catch (cl::Error& err) {
147 ABORT_WITH_ERROR(err);
152 if (callback !=
nullptr) {
155 auto userData =
static_cast<void*
>(
new std::function<void()>(callback));
156 event.setCallback(CL_COMPLETE, ::invokeCallback, userData);
159 LOG_DEBUG_INFO(
"Kernel for device ", _id,
" enqueued with global range: ",
160 ::printNDRange(global),
", local: ", ::printNDRange(local),
161 ", offset: ", ::printNDRange(offset));
165 cl::Event Device::enqueueWrite(
const DeviceBuffer& buffer,
166 const void* hostPointer,
167 size_t hostOffset)
const
171 _commandQueue.enqueueWriteBuffer(buffer.clBuffer(),
174 buffer.sizeInBytes(),
175 static_cast<const void*
>(
176 static_cast<const char*
>(
178 +(hostOffset * buffer.elemSize() ) ),
181 _commandQueue.flush();
182 }
catch (cl::Error& err) {
183 ABORT_WITH_ERROR(err);
186 LOG_DEBUG_INFO(
"Enqueued write buffer for device ", _id,
187 " (size: ", buffer.sizeInBytes(),
188 ", clBuffer: ", buffer.clBuffer()(),
190 ", hostPointer: ", static_cast<const void*>(
191 static_cast<const char*>(hostPointer)
192 + (hostOffset * buffer.elemSize()) ),
193 ", hostOffset: ", hostOffset*buffer.elemSize() ,
")");
197 cl::Event Device::enqueueWrite(
const DeviceBuffer& buffer,
198 void*
const hostPointer,
201 size_t hostOffset)
const
205 _commandQueue.enqueueWriteBuffer(buffer.clBuffer(),
207 (deviceOffset * buffer.elemSize()),
208 size * buffer.elemSize(),
209 static_cast<void*const
>(
210 static_cast<char*const
>(
212 +(hostOffset * buffer.elemSize() ) ),
215 _commandQueue.flush();
216 }
catch (cl::Error& err) {
217 ABORT_WITH_ERROR(err);
220 LOG_DEBUG_INFO(
"Enqueued write buffer for device ", _id,
221 " (size: ", size * buffer.elemSize(),
222 ", clBuffer: ", buffer.clBuffer()(),
223 ", deviceOffset: ", deviceOffset*buffer.elemSize(),
224 ", hostPointer: ",
static_cast<void*const
>(
225 static_cast<char*const
>(hostPointer)
226 + (hostOffset * buffer.elemSize()) ),
227 ", hostOffset: ", hostOffset*buffer.elemSize() ,
")");
231 cl::Event Device::enqueueRead(
const DeviceBuffer& buffer,
233 size_t hostOffset)
const
237 _commandQueue.enqueueReadBuffer(buffer.clBuffer(),
240 buffer.sizeInBytes(),
244 +(hostOffset * buffer.elemSize()) ),
247 _commandQueue.flush();
248 }
catch (cl::Error& err) {
249 ABORT_WITH_ERROR(err);
252 LOG_DEBUG_INFO(
"Enqueued read buffer for device ", _id,
253 " (size: ", buffer.sizeInBytes(),
254 ", clBuffer: ", buffer.clBuffer()(),
256 ", hostPointer: ", static_cast<void*>(
257 static_cast<char*const>(hostPointer)
258 + (hostOffset * buffer.elemSize()) ),
259 ", hostOffset: ", hostOffset * buffer.elemSize() ,
")");
263 cl::Event Device::enqueueRead(
const DeviceBuffer& buffer,
264 void*
const hostPointer,
267 size_t hostOffset)
const
271 _commandQueue.enqueueReadBuffer(buffer.clBuffer(),
273 deviceOffset * buffer.elemSize(),
274 size * buffer.elemSize(),
275 static_cast<void*const
>(
276 static_cast<char*const
>(
278 +(hostOffset * buffer.elemSize()) ),
281 _commandQueue.flush();
282 }
catch (cl::Error& err) {
283 ABORT_WITH_ERROR(err);
286 LOG_DEBUG_INFO(
"Enqueued read buffer for device ", _id,
287 " (size: ", size * buffer.elemSize(),
288 ", clBuffer: ", buffer.clBuffer()(),
289 ", deviceOffset: ", deviceOffset * buffer.elemSize(),
290 ", hostPointer: ",
static_cast<void*
>(
291 static_cast<char*const
>(hostPointer)
292 + (hostOffset * buffer.elemSize()) ),
293 ", hostOffset: ", hostOffset * buffer.elemSize() ,
")");
297 cl::Event Device::enqueueCopy(
const DeviceBuffer& from,
298 const DeviceBuffer& to,
300 size_t toOffset)
const
302 ASSERT( (from.sizeInBytes() - fromOffset)
303 <= (to.sizeInBytes() - toOffset) );
306 _commandQueue.enqueueCopyBuffer(from.clBuffer(),
310 from.sizeInBytes() - fromOffset,
313 _commandQueue.flush();
314 }
catch (cl::Error& err) {
315 ABORT_WITH_ERROR(err);
318 LOG_DEBUG_INFO(
"Enqueued copy buffer for device ", _id,
319 " (from: ", from.clBuffer()(),
320 ", to: ", to.clBuffer()(),
321 ", size: ", from.sizeInBytes() - fromOffset,
322 ", fromOffset: ", fromOffset,
323 ", toOffset: ", toOffset,
")");
328 void Device::wait()
const
330 LOG_DEBUG_INFO(
"Start waiting for device with id: ", _id);
332 _commandQueue.finish();
333 }
catch (cl::Error& err) {
334 ABORT_WITH_ERROR(err);
336 LOG_DEBUG_INFO(
"Finished waiting for device with id: ", _id);
339 Device::id_type Device::id()
const
344 bool Device::isType(Type t)
const
346 return _device.getInfo<CL_DEVICE_TYPE>() == t;
349 std::string Device::name()
const
351 return _device.getInfo<CL_DEVICE_NAME>();
354 std::string Device::vendorName()
const
356 return _device.getInfo<CL_DEVICE_VENDOR>();
359 unsigned int Device::maxClockFrequency()
const
361 return _device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>();
364 unsigned int Device::maxComputeUnits()
const
366 return _device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
369 size_t Device::maxWorkGroupSize()
const
371 return _device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
374 size_t Device::maxWorkGroups()
const
377 _device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0] / maxWorkGroupSize();
380 unsigned long Device::globalMemSize()
const
382 return _device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
385 unsigned long Device::localMemSize()
const
387 return _device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
390 const cl::Context& Device::clContext()
const
395 const cl::Device& Device::clDevice()
const
400 bool Device::supportsDouble()
const
402 std::string extensions = _device.getInfo<CL_DEVICE_EXTENSIONS>();
403 return (extensions.find(
"cl_khr_fp64") != std::string::npos);
406 std::istream& operator>>(std::istream& stream, Device::Type& type)
411 if (s ==
"ALL") type = Device::Type::ALL;
412 else if (s ==
"ANY") type = Device::Type::ANY;
413 else if (s ==
"CPU") type = Device::Type::CPU;
414 else if (s ==
"GPU") type = Device::Type::GPU;
415 else if (s ==
"ACCELERATOR") type = Device::Type::ACCELERATOR;
416 else if (s ==
"DEFAULT") type = Device::Type::DEFAULT;
417 else throw std::invalid_argument(
418 "Could not parse (" + s +
") as Device::Type.");
423 std::ostream& operator<<(std::ostream& stream,
const Device::Type& type)
425 if (type == Device::Type::ANY)
return stream <<
"ANY";
426 if (type == Device::Type::CPU)
return stream <<
"CPU";
427 if (type == Device::Type::GPU)
return stream <<
"GPU";
428 if (type == Device::Type::ACCELERATOR)
return stream <<
"ACCELERATOR";
429 if (type == Device::Type::DEFAULT)
return stream <<
"DEFAULT";
431 throw std::logic_error(
"This point should never be reached.");
SKELCL_DLL detail::DeviceID device(size_t dID)
Creates an OpenCL device ID to be used as parameter of the init(detail::PlatformID, detail::DeviceID) function.
SKELCL_DLL Local local(size_t sizeInBytes)
Helper function to easily create an Local object with a given size.
SKELCL_DLL detail::PlatformID platform(size_t pID)
Creates an OpenCL platform ID to be used as parameter of the init(detail::PlatformID, detail::DeviceID) function.