From f487895f9f4eeb11fd76b017d311c47d3f8915d1 Mon Sep 17 00:00:00 2001 From: MitchellHansen Date: Sat, 3 Sep 2016 19:23:50 -0700 Subject: [PATCH] that was a pain. Got it working on windows again. MSVC was being really picky about a few errors. Good thing though, I'm not really sure why clang + osx let me be that lax with memory --- include/CL_Wrapper.h | 1 + kernels/{kernel.c => kernel.cl} | 248 +++++++++--------- .../{minimal_kernel.c => minimal_kernel.cl} | 12 +- src/CL_Wrapper.cpp | 24 +- src/TestPlatform.cpp | 20 +- src/main.cpp | 10 +- 6 files changed, 173 insertions(+), 142 deletions(-) rename kernels/{kernel.c => kernel.cl} (97%) rename kernels/{minimal_kernel.c => minimal_kernel.cl} (96%) diff --git a/include/CL_Wrapper.h b/include/CL_Wrapper.h index 7069b7a..693a5e0 100644 --- a/include/CL_Wrapper.h +++ b/include/CL_Wrapper.h @@ -25,6 +25,7 @@ struct device { cl_uint clock_frequency; char version[128]; cl_platform_id platform; + cl_uint comp_units; }; class CL_Wrapper { diff --git a/kernels/kernel.c b/kernels/kernel.cl similarity index 97% rename from kernels/kernel.c rename to kernels/kernel.cl index dd68506..6acf0f8 100644 --- a/kernels/kernel.c +++ b/kernels/kernel.cl @@ -1,125 +1,125 @@ -// global : local : constant : private - -// Function arguments of type image2d_t, image3d_t, image2d_array_t, image1d_t, image1d_buffer_t, -// and image1d_array_t refer to image memory objects allocated in the **global** address space. - -// http://downloads.ti.com/mctools/esd/docs/opencl/memory/buffers.html - -// Open CL C -// https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/opencl-c/ - -__kernel void hello( - global int2* resolution, - global char* map, - global float3* projection_matrix, - global float3* cam_dir, - global float3* cam_pos, - global image2d_t* canvas) { - - printf("%s\n", "this is a test string\n"); - - - - const int MAX_RAY_STEPS = 64; - - // The pixel coord we are at - int2 screenPos = (int2)(get_global_id(0) % resolution->x, get_global_id(0) / resolution->x); - - // The X and Y planes - //float3 cameraPlaneU = vec3(1.0, 0.0, 0.0) - - // Y being multiplied by the aspect ratio, usually around .5-6ish; - //cl_float3 cameraPlaneV = vec3(0.0, 1.0, 0.0) * iResolution.y / iResolution.x; - - // So this is how they do that ray aiming! hah this is so tiny - // (camera direction) + (pixel.x * the X plane) + (product of pixel.y * Y plane) - // Oh all it's doing is adding the x and y coords of the pixel to the camera direction vector, interesting - - //cl_float3 rayDir = cameraDir + screenPos.x * cameraPlaneU + screenPos.y * cameraPlaneV; - - // the origin of the ray - // So the sign thing is for the up and down motion - - //cl_float3 rayPos = vec3(0.0, 2.0 * sin(iGlobalTime * 2.7), -12.0); - - // Ah, and here is where it spins around the center axis - // So it looks like its applying a function to rotate the x and z axis - //rayPos.xz = rotate2d(rayPos.xz, iGlobalTime); - //rayDir.xz = rotate2d(rayDir.xz, iGlobalTime); - - // Just an intvec of out coords - //ivec3 mapPos = ivec3(floor(rayPos)); - - // I think this is the delta t value - // the magnitude of the vector divided by the rays direction. Not sure what the aim of that is - // The ray direction might always be normalized, so that would be the dame as my delta_T - //vec3 deltaDist = abs(vec3(length(rayDir)) / rayDir); - - // The steps are the signs of the ray direction - //ivec3 rayStep = ivec3(sign(rayDir)); - - // ithe sign of the rays direction - // * - // Convert map position to a floating point vector and take away the ray position - // + - // the sign of the rays direction by 0.5 - // + - // 0.5 - // Now multyply everything by 0.5 - //vec3 sideDist = (sign(rayDir) * (vec3(mapPos) - rayPos) + (sign(rayDir) * 0.5) + 0.5) * deltaDist; - - // A byte mask - //bvec3 mask; - - // repeat until the max steps - //for (int i = 0; i < MAX_RAY_STEPS; i++) { - - // If there is a voxel at the map position, continue? - //if (getVoxel(mapPos)) - // break; - - // - // find which is smaller - // y ? z --> x` - // z ? x --> y` - // x ? y --> z` - // - // find which os is less or equal - // x` ? x --> x - // y` ? y --> y - // z` ? z --> z - - // Now find which ons is - //mask = lessThanEqual(sideDist.xyz, min(sideDist.yzx, sideDist.zxy)); - - - // Originally he used a component wise - /*bvec3 b1 = lessThan(sideDist.xyz, sideDist.yzx); - bvec3 b2 = lessThanEqual(sideDist.xyz, sideDist.zxy); - mask.x = b1.x && b2.x; - mask.y = b1.y && b2.y; - mask.z = b1.z && b2.z;*/ - //Would've done mask = b1 && b2 but the compiler is making me do it component wise. - - //All components of mask are false except for the corresponding largest component - //of sideDist, which is the axis along which the ray should be incremented. - - //sideDist += vec3(mask) * deltaDist; - //mapPos += ivec3(mask) * rayStep; - //} - - // Ah this is for coloring obviously, seems to be odd though, no indexing - //vec4 color; - //if (mask.x) { - // color = vec4(0.5); - //} - //if (mask.y) { - // color = vec4(1.0); - //} - //if (mask.z) { - // color = vec4(0.75); - //} - //write_imagef(image, pixel, color); - - +// global : local : constant : private + +// Function arguments of type image2d_t, image3d_t, image2d_array_t, image1d_t, image1d_buffer_t, +// and image1d_array_t refer to image memory objects allocated in the **global** address space. + +// http://downloads.ti.com/mctools/esd/docs/opencl/memory/buffers.html + +// Open CL C +// https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/opencl-c/ + +__kernel void hello( + global int2* resolution, + global char* map, + global float3* projection_matrix, + global float3* cam_dir, + global float3* cam_pos, + global image2d_t* canvas) { + + printf("%s\n", "this is a test string\n"); + + + + const int MAX_RAY_STEPS = 64; + + // The pixel coord we are at + int2 screenPos = (int2)(get_global_id(0) % resolution->x, get_global_id(0) / resolution->x); + + // The X and Y planes + //float3 cameraPlaneU = vec3(1.0, 0.0, 0.0) + + // Y being multiplied by the aspect ratio, usually around .5-6ish; + //cl_float3 cameraPlaneV = vec3(0.0, 1.0, 0.0) * iResolution.y / iResolution.x; + + // So this is how they do that ray aiming! hah this is so tiny + // (camera direction) + (pixel.x * the X plane) + (product of pixel.y * Y plane) + // Oh all it's doing is adding the x and y coords of the pixel to the camera direction vector, interesting + + //cl_float3 rayDir = cameraDir + screenPos.x * cameraPlaneU + screenPos.y * cameraPlaneV; + + // the origin of the ray + // So the sign thing is for the up and down motion + + //cl_float3 rayPos = vec3(0.0, 2.0 * sin(iGlobalTime * 2.7), -12.0); + + // Ah, and here is where it spins around the center axis + // So it looks like its applying a function to rotate the x and z axis + //rayPos.xz = rotate2d(rayPos.xz, iGlobalTime); + //rayDir.xz = rotate2d(rayDir.xz, iGlobalTime); + + // Just an intvec of out coords + //ivec3 mapPos = ivec3(floor(rayPos)); + + // I think this is the delta t value + // the magnitude of the vector divided by the rays direction. Not sure what the aim of that is + // The ray direction might always be normalized, so that would be the dame as my delta_T + //vec3 deltaDist = abs(vec3(length(rayDir)) / rayDir); + + // The steps are the signs of the ray direction + //ivec3 rayStep = ivec3(sign(rayDir)); + + // ithe sign of the rays direction + // * + // Convert map position to a floating point vector and take away the ray position + // + + // the sign of the rays direction by 0.5 + // + + // 0.5 + // Now multyply everything by 0.5 + //vec3 sideDist = (sign(rayDir) * (vec3(mapPos) - rayPos) + (sign(rayDir) * 0.5) + 0.5) * deltaDist; + + // A byte mask + //bvec3 mask; + + // repeat until the max steps + //for (int i = 0; i < MAX_RAY_STEPS; i++) { + + // If there is a voxel at the map position, continue? + //if (getVoxel(mapPos)) + // break; + + // + // find which is smaller + // y ? z --> x` + // z ? x --> y` + // x ? y --> z` + // + // find which os is less or equal + // x` ? x --> x + // y` ? y --> y + // z` ? z --> z + + // Now find which ons is + //mask = lessThanEqual(sideDist.xyz, min(sideDist.yzx, sideDist.zxy)); + + + // Originally he used a component wise + /*bvec3 b1 = lessThan(sideDist.xyz, sideDist.yzx); + bvec3 b2 = lessThanEqual(sideDist.xyz, sideDist.zxy); + mask.x = b1.x && b2.x; + mask.y = b1.y && b2.y; + mask.z = b1.z && b2.z;*/ + //Would've done mask = b1 && b2 but the compiler is making me do it component wise. + + //All components of mask are false except for the corresponding largest component + //of sideDist, which is the axis along which the ray should be incremented. + + //sideDist += vec3(mask) * deltaDist; + //mapPos += ivec3(mask) * rayStep; + //} + + // Ah this is for coloring obviously, seems to be odd though, no indexing + //vec4 color; + //if (mask.x) { + // color = vec4(0.5); + //} + //if (mask.y) { + // color = vec4(1.0); + //} + //if (mask.z) { + // color = vec4(0.75); + //} + //write_imagef(image, pixel, color); + + } \ No newline at end of file diff --git a/kernels/minimal_kernel.c b/kernels/minimal_kernel.cl similarity index 96% rename from kernels/minimal_kernel.c rename to kernels/minimal_kernel.cl index 7dee964..e097773 100644 --- a/kernels/minimal_kernel.c +++ b/kernels/minimal_kernel.cl @@ -48,17 +48,17 @@ __kernel void min_kern( // Setup the voxel coords from the camera origin int3 voxel = { - floorf(cam_pos->x), - floorf(cam_pos->y), - floorf(cam_pos->z) + floor(cam_pos->x), + floor(cam_pos->y), + floor(cam_pos->z) }; // Delta T is the units a ray must travel along an axis in order to // traverse an integer split float3 delta_t = { - fabsf(1.0f / ray_dir.x), - fabsf(1.0f / ray_dir.y), - fabsf(1.0f / ray_dir.z) + fabs(1.0f / ray_dir.x), + fabs(1.0f / ray_dir.y), + fabs(1.0f / ray_dir.z) }; // Intersection T is the collection of the next intersection points diff --git a/src/CL_Wrapper.cpp b/src/CL_Wrapper.cpp index 4df21e5..d531f02 100644 --- a/src/CL_Wrapper.cpp +++ b/src/CL_Wrapper.cpp @@ -44,10 +44,11 @@ int CL_Wrapper::acquire_platform_and_device(){ d.id = deviceIds[q]; - clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, 128, &d.platform, NULL); - clGetDeviceInfo(d.id, CL_DEVICE_VERSION, 128, &d.version, NULL); - clGetDeviceInfo(d.id, CL_DEVICE_TYPE, 128, &d.type, NULL); - clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, 128, &d.clock_frequency, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &d.platform, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_VERSION, sizeof(char) * 128, &d.version, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_TYPE, sizeof(cl_device_type), &d.type, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &d.clock_frequency, NULL); + clGetDeviceInfo(d.id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &d.comp_units, NULL); plt_ids.at(d.platform).push_back(d); } @@ -58,7 +59,8 @@ int CL_Wrapper::acquire_platform_and_device(){ // falling back to the cpu with the fastest clock if we weren't able to find one device current_best_device; - current_best_device.clock_frequency = 0; // Set this to 0 so the first run always selects a new device + current_best_device.type = -1; // Set this to -1 so the first run always selects a new device + for (auto kvp: plt_ids){ @@ -72,7 +74,10 @@ int CL_Wrapper::acquire_platform_and_device(){ if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU){ current_best_device = device; } - else if (device.clock_frequency > current_best_device.clock_frequency){ + else if (device.comp_units > current_best_device.comp_units) { + current_best_device = device; + } + else if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency){ current_best_device = device; } } @@ -108,7 +113,7 @@ int CL_Wrapper::create_shared_context() { //}; HGLRC hGLRC = wglGetCurrentContext(); HDC hDC = wglGetCurrentDC(); - cl_context_properties context_properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformIds[1], CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, CL_WGL_HDC_KHR, (cl_context_properties)hDC, 0 }; + cl_context_properties context_properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, CL_WGL_HDC_KHR, (cl_context_properties)hDC, 0 }; #elif defined TARGET_OS_MAC @@ -206,6 +211,8 @@ int CL_Wrapper::compile_kernel(std::string kernel_source, bool is_path, std::str return -1; kernel_map.emplace(std::make_pair(kernel_name, kernel)); + + return 1; } int CL_Wrapper::set_kernel_arg( @@ -228,6 +235,7 @@ int CL_Wrapper::set_kernel_arg( int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){ buffer_map.emplace(std::make_pair(buffer_name, buffer)); + return 1; } int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){ @@ -246,7 +254,7 @@ int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){ if (assert(error, "clEnqueueNDRangeKernel")) return -1; - + return 1; } diff --git a/src/TestPlatform.cpp b/src/TestPlatform.cpp index e26bb78..e4d046c 100644 --- a/src/TestPlatform.cpp +++ b/src/TestPlatform.cpp @@ -1,13 +1,16 @@ #pragma once #include #include -#include #include #include #ifdef linux #elif defined _WIN32 +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#include +#include +#include #elif defined TARGET_OS_MAC # include @@ -111,6 +114,21 @@ inline int query_platform_devices() { clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 128, buf, NULL); fprintf(stdout, "%s\n", buf); + + + //cl_device_type a; + //clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, 128, &a, NULL); + //std::cout << a << std::endl; + + //cl_uint b; + //clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, 128, &b, NULL); + //std::cout << b << std::endl; + + //cl_uint c; + //clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 128, &c, NULL); + //std::cout << c << std::endl; + + std::cout << devices[i] << std::endl; } free(devices); diff --git a/src/main.cpp b/src/main.cpp index 85a9ca0..2b1e562 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,9 +9,12 @@ #include #elif defined _WIN32 +#include #include #include #include +#include + #include #elif defined TARGET_OS_MAC @@ -66,12 +69,13 @@ int main() { sf::Texture t; CL_Wrapper c; + query_platform_devices(); c.acquire_platform_and_device(); c.create_shared_context(); c.create_command_queue(); - c.compile_kernel("../kernels/kernel.c", true, "hello"); - c.compile_kernel("../kernels/minimal_kernel.c", true, "min_kern"); + //c.compile_kernel("../kernels/kernel.cl", true, "hello"); + c.compile_kernel("../kernels/minimal_kernel.cl", true, "min_kern"); sf::Vector3i map_dim(MAP_X, MAP_Y, MAP_Z); Map* map = new Map(map_dim); @@ -101,7 +105,7 @@ int main() { // SFML 2.4 has Vector4 datatypes....... - float view_matrix[view_res.x * view_res.y * 4]; + float* view_matrix = new float[WINDOW_X * WINDOW_Y * 4]; for (int y = -view_res.y / 2; y < view_res.y / 2; y++) { for (int x = -view_res.x / 2; x < view_res.x / 2; x++) {