From 941754d4f017c18de7a6e82ed0655ad18f2366eb Mon Sep 17 00:00:00 2001 From: MitchellHansen Date: Tue, 4 Apr 2017 02:21:45 -0700 Subject: [PATCH] More cleaning, small issue with copy by value sprites and textures. CL is spitting out erros that it isn't supposed to be able to spit out --- include/OpenCL.h | 72 ++++++--------- src/OpenCL.cpp | 231 ++++++++++++++++++++++++++++------------------- src/main.cpp | 77 +++++----------- 3 files changed, 191 insertions(+), 189 deletions(-) diff --git a/include/OpenCL.h b/include/OpenCL.h index 3818e4d..a65347a 100644 --- a/include/OpenCL.h +++ b/include/OpenCL.h @@ -32,7 +32,7 @@ class OpenCL { public: - OpenCL(sf::Vector2i resolution); + OpenCL(); ~OpenCL(); // command queues are associated with a device and context, so for multi-gpu applications you would need @@ -45,14 +45,27 @@ public: // kernels on one or more devices specified in the context. // - Contexts cannot be created using more than one platform! + bool init(); + bool compile_kernel(std::string kernel_path, std::string kernel_name); - bool init(sf::Vector4f *range); + // Create an image buffer from an SF texture. Access Type is the read/write specifier required by OpenCL + bool create_image_buffer(std::string buffer_name, sf::Texture* texture, cl_int access_type); - void run_kernel(std::string kernel_name); + // Have CL create and manage the texture for the image buffer. Access Type is the read/write specifier required by OpenCL + bool create_image_buffer(std::string buffer_name, sf::Vector2i size, cl_int access_type); - void draw(sf::RenderWindow *window); + // Create a buffer with CL_MEM_READ_ONLY and CL_MEM_COPY_HOST_PTR + int create_buffer(std::string buffer_name, cl_uint size, void* data); + // Create a buffer with user defined data access flags + int create_buffer(std::string buffer_name, cl_uint size, void* data, cl_mem_flags flags); + + int set_kernel_arg(std::string kernel_name, int index, std::string buffer_name); + + void run_kernel(std::string kernel_name, sf::Vector2i work_size); + + void draw(sf::RenderWindow *window); class device { @@ -72,7 +85,8 @@ public: #pragma pack(pop) device(cl_device_id device_id, cl_platform_id platform_id); - void print(std::ostream& stream); + device(const device& d); + void print(std::ostream& stream) const; void print_packed_data(std::ostream& stream); cl_device_id getDeviceId() const { return device_id; }; @@ -92,21 +106,9 @@ public: private: - bool load_config(); - void save_config(); - - std::vector device_list; - - - std::vector>> platforms_and_devices; - int error = 0; - // Sprite and texture that is shared between CL and GL - sf::Sprite viewport_sprite; - sf::Texture viewport_texture; - sf::Vector2i viewport_resolution; // The device which we have selected according to certain criteria cl_platform_id platform_id; @@ -119,42 +121,28 @@ private: // Maps which contain a mapping from "name" to the host side CL memory object std::unordered_map kernel_map; std::unordered_map buffer_map; + std::unordered_map> image_map; + std::vector device_list; - // Query the hardware on this machine and select the best device and the platform on which it resides - void aquire_hardware(); + // Query the hardware on this machine and store the devices + bool aquire_hardware(); // After aquiring hardware, create a shared context using platform specific CL commands - void create_shared_context(); + bool create_shared_context(); // Command queues must be created with a valid context - void create_command_queue(); - - // Compile the kernel and store it in the kernel map with the name as the key - bool compile_kernel(std::string kernel_path, std::string kernel_name); - - // Buffer operations - // All of these functions create and store a buffer in a map with the key representing their name - - // Create an image buffer from an SF texture. Access Type is the read/write specifier required by OpenCL - int create_image_buffer(std::string buffer_name, cl_uint size, sf::Texture* texture, cl_int access_type); - - // Create a buffer with CL_MEM_READ_ONLY and CL_MEM_COPY_HOST_PTR - int create_buffer(std::string buffer_name, cl_uint size, void* data); - - - // Create a buffer with user defined data access flags - int create_buffer(std::string buffer_name, cl_uint size, void* data, cl_mem_flags flags); + bool create_command_queue(); // Store a cl_mem object in the buffer map - int store_buffer(cl_mem buffer, std::string buffer_name); + bool store_buffer(cl_mem buffer, std::string buffer_name); // Using CL release the memory object and remove the KVP associated with the buffer name - int release_buffer(std::string buffer_name); + bool release_buffer(std::string buffer_name); - void assign_kernel_args(); - int set_kernel_arg(std::string kernel_name, int index, std::string buffer_name); + bool load_config(); + void save_config(); static bool vr_assert(int error_code, std::string function_name); - + }; diff --git a/src/OpenCL.cpp b/src/OpenCL.cpp index 412aadf..72e8d58 100644 --- a/src/OpenCL.cpp +++ b/src/OpenCL.cpp @@ -1,10 +1,15 @@ #include #include "util.hpp" +OpenCL::OpenCL() { +} + +OpenCL::~OpenCL() { +} -void OpenCL::run_kernel(std::string kernel_name) { +void OpenCL::run_kernel(std::string kernel_name, sf::Vector2i work_size) { - size_t global_work_size[2] = { static_cast(viewport_resolution.x), static_cast(viewport_resolution.y) }; + size_t global_work_size[2] = { static_cast(work_size.x), static_cast(work_size.y) }; cl_kernel kernel = kernel_map.at(kernel_name); @@ -32,49 +37,60 @@ void OpenCL::run_kernel(std::string kernel_name) { void OpenCL::draw(sf::RenderWindow *window) { - window->draw(viewport_sprite); + for (auto i: image_map) { + window->draw(i.second.first); + } } -void OpenCL::aquire_hardware() { +bool OpenCL::aquire_hardware() +{ // Get the number of platforms - cl_uint plt_cnt = 0; - clGetPlatformIDs(0, nullptr, &plt_cnt); + cl_uint platform_count = 0; + clGetPlatformIDs(0, nullptr, &platform_count); + + if (platform_count == 0) { + std::cout << "There appears to be no OpenCL platforms on this machine" << std::endl; + return false; + } // Get the ID's for those platforms - std::vector plt_buf(plt_cnt); - clGetPlatformIDs(plt_cnt, plt_buf.data(), nullptr); + std::vector plt_buf(platform_count); - // Populate the storage vector with the platform id's - for (auto id : plt_buf) { - platforms_and_devices.push_back(std::make_pair(id, std::vector())); - } + clGetPlatformIDs(platform_count, plt_buf.data(), nullptr); + if (vr_assert(error, "clGetPlatformIDs")) + return false; - int device_position = 0; - for (unsigned int i = 0; i < plt_cnt; i++) { + // Cycle through the platform ID's + for (unsigned int i = 0; i < platform_count; i++) { + // And get their device count cl_uint deviceIdCount = 0; error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount); + if (vr_assert(error, "clGetDeviceIDs")) + return false; - // Check to see if we even have OpenCL on this machine if (deviceIdCount == 0) { - std::cout << "There appears to be no devices, or none at least supporting OpenCL" << std::endl; - return; - } - - // Get the device ids - std::vector deviceIds(deviceIdCount); - error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data(), NULL); - - for (int d = 0; d < deviceIds.size(); d++) { + std::cout << "There appears to be no devices associated with this platform" << std::endl; + + } else { - device_list.emplace_back(device(deviceIds[d], plt_buf.at(i))); + // Get the device ids and place them in the device list + std::vector deviceIds(deviceIdCount); + error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data(), NULL); + if (vr_assert(error, "clGetDeviceIDs")) + return false; + + for (int d = 0; d < deviceIds.size(); d++) { + device_list.emplace_back(device(deviceIds[d], plt_buf.at(i))); + } } } + } -void OpenCL::create_shared_context() { +bool OpenCL::create_shared_context() { // Hurray for standards! // Setup the context properties to grab the current GL context @@ -110,6 +126,11 @@ void OpenCL::create_shared_context() { 0 }; +#elif + + std::cout << "Target machine not supported for cl_khr_gl_sharing" << std::endl; + return false; + #endif // Create our shared context @@ -122,26 +143,29 @@ void OpenCL::create_shared_context() { ); if (vr_assert(error, "clCreateContext")) - return; + return false; + + return true; } -void OpenCL::create_command_queue() { +bool OpenCL::create_command_queue() { - // If context and device_id have initialized + // Command queue requires a context and device id. It can also be a device ID list + // as long as the devices reside on the same platform if (context && device_id) { command_queue = clCreateCommandQueue(context, device_id, 0, &error); - if (vr_assert(error, "clCreateCommandQueue")) - return; - - return; - } - else { - std::cout << "Failed creating the command queue. Context or device_id not initialized"; - return; + return false; + + } else { + + std::cout << "Failed creating the command queue. Context or device_id not initialized" << std::endl; + return false; } + + return true; } bool OpenCL::compile_kernel(std::string kernel_path, std::string kernel_name) { @@ -200,23 +224,56 @@ bool OpenCL::compile_kernel(std::string kernel_path, std::string kernel_name) { return true; } -int OpenCL::create_image_buffer(std::string buffer_name, cl_uint size, sf::Texture* texture, cl_int access_type) { +bool OpenCL::create_image_buffer(std::string buffer_name, sf::Texture* texture, cl_int access_type) { if (buffer_map.count(buffer_name) > 0) { release_buffer(buffer_name); + + // Need to check to see if we are taking care of the texture as well + if (image_map.count(buffer_name) > 0) + image_map.erase(buffer_name); } - int error; cl_mem buff = clCreateFromGLTexture( context, access_type, GL_TEXTURE_2D, 0, texture->getNativeHandle(), &error); if (vr_assert(error, "clCreateFromGLTexture")) - return 1; + return false; store_buffer(buff, buffer_name); - return 1; + return true; +} + + +bool OpenCL::create_image_buffer(std::string buffer_name, sf::Vector2i size, cl_int access_type) { + + if (buffer_map.count(buffer_name) > 0) { + release_buffer(buffer_name); + + // Need to check to see if we are taking care of the texture as well + if (image_map.count(buffer_name) > 0) + image_map.erase(buffer_name); + } + + sf::Texture texture; + texture.create(size.x, size.y); + + sf::Sprite sprite(texture); + + image_map[buffer_name] = std::make_pair(sprite, texture); + + cl_mem buff = clCreateFromGLTexture( + context, access_type, GL_TEXTURE_2D, + 0, texture.getNativeHandle(), &error); + + if (vr_assert(error, "clCreateFromGLTexture")) + return false; + + store_buffer(buff, buffer_name); + + return true; } int OpenCL::create_buffer(std::string buffer_name, cl_uint size, void* data) { @@ -258,45 +315,47 @@ int OpenCL::create_buffer(std::string buffer_name, cl_uint size, void* data, cl_ } -int OpenCL::store_buffer(cl_mem buffer, std::string buffer_name) { +bool OpenCL::store_buffer(cl_mem buffer, std::string buffer_name) { - if (buffer_map.count(buffer_name)) { - clReleaseMemObject(buffer_map[buffer_name]); + if (buffer_map.count(buffer_name) > 0) { + + error = clReleaseMemObject(buffer_map.at(buffer_name)); + + if (vr_assert(error, "clReleaseMemObject")) { + std::cout << "Error releasing overlapping buffer : " << buffer_name; + std::cout << "Buffer not added"; + return false; + } } buffer_map[buffer_name] = buffer; - return 1; + return true; } -int OpenCL::release_buffer(std::string buffer_name) { +bool OpenCL::release_buffer(std::string buffer_name) { if (buffer_map.count(buffer_name) > 0) { - int error = clReleaseMemObject(buffer_map.at(buffer_name)); + error = clReleaseMemObject(buffer_map.at(buffer_name)); if (vr_assert(error, "clReleaseMemObject")) { std::cout << "Error releasing buffer : " << buffer_name; std::cout << "Buffer not removed"; - return -1; + return false; } - else { - buffer_map.erase(buffer_name); - } + + buffer_map.erase(buffer_name); + + } else { - } - else { std::cout << "Error releasing buffer : " << buffer_name; std::cout << "Buffer not found"; - return -1; + return false; } - return 1; -} - -void OpenCL::assign_kernel_args() { - + return true; } int OpenCL::set_kernel_arg(std::string kernel_name, int index, std::string buffer_name) { @@ -315,19 +374,6 @@ int OpenCL::set_kernel_arg(std::string kernel_name, int index, std::string buffe return 1; } -OpenCL::OpenCL(sf::Vector2i resolution) : viewport_resolution(resolution){ - - viewport_texture.create(viewport_resolution.x, viewport_resolution.y); - viewport_sprite.setTexture(viewport_texture); - - -} - -OpenCL::~OpenCL() { - -} - - bool OpenCL::load_config() { std::ifstream input_file("device_config.bin", std::ios::binary | std::ios::in); @@ -368,11 +414,10 @@ void OpenCL::save_config() { output_file.close(); } -bool OpenCL::init(sf::Vector4f *range) -{ +bool OpenCL::init() { - // Initialize opencl up to the point where we start assigning buffers - aquire_hardware(); + if (!aquire_hardware()) + return false; if (!load_config()) { @@ -399,25 +444,14 @@ bool OpenCL::init(sf::Vector4f *range) platform_id = device_list.at(selection).getPlatformId(); save_config(); - } - create_shared_context(); - - create_command_queue(); - - while (!compile_kernel("../kernels/mandlebrot.cl", "mandlebrot")) { - std::cin.get(); - } + if (!create_shared_context()) + return false; - create_image_buffer("viewport_image", viewport_texture.getSize().x * viewport_texture.getSize().x * 4 * sizeof(float), &viewport_texture, CL_MEM_WRITE_ONLY); - create_buffer("image_res", sizeof(sf::Vector2i), &viewport_resolution); - create_buffer("range", sizeof(sf::Vector4f), range, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR); - + if (!create_command_queue()) + return false; - set_kernel_arg("mandlebrot", 0, "image_res"); - set_kernel_arg("mandlebrot", 1, "viewport_image"); - set_kernel_arg("mandlebrot", 2, "range"); return true; } @@ -648,7 +682,20 @@ OpenCL::device::device(cl_device_id device_id, cl_platform_id platform_id) { } -void OpenCL::device::print(std::ostream& stream) { +OpenCL::device::device(const device& d) { + + // member values, copy individually + device_id = d.device_id; + platform_id = d.platform_id; + is_little_endian = d.is_little_endian; + cl_gl_sharing = d.cl_gl_sharing; + + // struct so it copies by value + data = d.data; + +} + +void OpenCL::device::print(std::ostream& stream) const { stream << "\n\tDevice ID : " << device_id << std::endl; stream << "\tDevice Name : " << data.device_name << std::endl; diff --git a/src/main.cpp b/src/main.cpp index 10bb0dc..ee1c373 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,64 +24,39 @@ float elap_time() { const int WINDOW_X = 1920; const int WINDOW_Y = 1080; -float scale(float valueIn, float origMin, float origMax, float scaledMin, float scaledMax) { - return ((scaledMax - scaledMin) * (valueIn - origMin) / (origMax - origMin)) + scaledMin; -} - -void func(int id, int count, sf::Uint8* pixels) { - - for (int pixel_x = 0; pixel_x < WINDOW_X; pixel_x++) { - - for (int pixel_y = (WINDOW_Y * ((float)id / count)); pixel_y < (WINDOW_Y * ((float)(id + 1) / count)); pixel_y++) { - - float y0 = scale(pixel_y, 0, WINDOW_Y, -1.0f, 1.0f); - float x0 = scale(pixel_x, 0, WINDOW_X, -2.0f, 1.0f); - - float x = 0.0; - float y = 0.0; - - int iteration_count = 0; - int interation_threshold = 1000; - - while (x*x + y*y < 4 && iteration_count < interation_threshold) { - float x_temp = x*x - y*y + x0; - y = 2 * x * y + y0; - x = x_temp; - iteration_count++; - } - - sf::Color c(0, 0, scale(iteration_count, 0, 1000, 0, 255), 255); - int val = scale(iteration_count, 0, 1000, 0, 16777216); - - pixels[(pixel_y * WINDOW_X + pixel_x) * 4 + 0] = val & 0xff; - pixels[(pixel_y * WINDOW_X + pixel_x) * 4 + 1] = (val >> 8) & 0xff; - pixels[(pixel_y * WINDOW_X + pixel_x) * 4 + 2] = (val >> 16) & 0xff; - pixels[(pixel_y * WINDOW_X + pixel_x) * 4 + 3] = 200; - } - } -} - enum Mouse_State {PRESSED, DEPRESSED}; int main() { - - std::mt19937 rng(time(NULL)); - std::uniform_int_distribution rgen(100, 400); sf::RenderWindow window(sf::VideoMode(WINDOW_X, WINDOW_Y), "quick-sfml-template"); window.setFramerateLimit(60); float physic_step = 0.166f; float physic_time = 0.0f; - double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0; - fps_counter fps; - OpenCL cl(sf::Vector2i(WINDOW_X, WINDOW_Y)); + OpenCL cl; sf::Vector4f range(-1.0f, 1.0f, -1.0f, 1.0f); - cl.init(&range); + sf::Vector2i window_dimensions(WINDOW_X, WINDOW_Y); + + cl.init(); + + while (!cl.compile_kernel("../kernels/mandlebrot.cl", "mandlebrot")) { + std::cin.get(); + } + + sf::Texture t; + t.create(WINDOW_X, WINDOW_Y); + sf::Sprite window_sprite(t); + cl.create_image_buffer("viewport_image", &t, CL_MEM_WRITE_ONLY); + cl.create_buffer("image_res", sizeof(sf::Vector2i), &window_dimensions); + cl.create_buffer("range", sizeof(sf::Vector4f), (void*)&range, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR); + + cl.set_kernel_arg("mandlebrot", 0, "image_res"); + cl.set_kernel_arg("mandlebrot", 1, "viewport_image"); + cl.set_kernel_arg("mandlebrot", 2, "range"); while (window.isOpen()) { @@ -130,23 +105,15 @@ int main() { accumulator_time += delta_time; while (accumulator_time >= physic_step) { // While the frame has sim time, update - accumulator_time -= physic_step; physic_time += physic_step; - - // Do physics at 60fps } - cl.run_kernel("mandlebrot"); - window.clear(sf::Color::White); - + + cl.run_kernel("mandlebrot", window_dimensions); cl.draw(&window); - - //window.draw(viewport_sprite); - - fps.draw(&window); - fps.frame(delta_time); + window.draw(window_sprite); window.display();