From 10bc771807345e62cbbc3c8bd0667c31d28c31b5 Mon Sep 17 00:00:00 2001 From: mitchellhansen Date: Mon, 5 Sep 2016 22:01:07 -0700 Subject: [PATCH] MBP was having problems with out of bounds memory operations with the way the cam dir was handled. sf::vector3f -> float3 and while accessign the Zth element. I'm assuming it was because of some weird backend stuff regarding that gentypeOdds are actually gentypeOdds + 1. Converted write_imagef's to write_imageui's though I don't think that really helps anything. Fixed the bottom half of the screen getting cut off. View matrix import error. Fixed problem the MBP had with negative values during device init, that was a weird one. --- kernels/minimal_kernel.cl | 51 +++++++++++++++++++++++++++------------ src/CL_Wrapper.cpp | 9 ++++--- src/TestPlatform.cpp | 7 +++--- src/main.cpp | 32 ++++++++++++------------ 4 files changed, 59 insertions(+), 40 deletions(-) diff --git a/kernels/minimal_kernel.cl b/kernels/minimal_kernel.cl index 565df95..ecc5669 100644 --- a/kernels/minimal_kernel.cl +++ b/kernels/minimal_kernel.cl @@ -1,15 +1,15 @@ -float4 white_light(float4 input, float3 light, int3 mask) { +uint4 white_light(uint4 input, float3 light, int3 mask) { input.w = input.w + acos( dot( normalize(light), normalize(fabs(convert_float3(mask))) ) - ) / 2; + ) * 50; - return input; + return (input); } @@ -25,8 +25,11 @@ __kernel void min_kern( __write_only image2d_t image ){ + size_t id = get_global_id(0); int2 pixel = {id % resolution->x, id / resolution->x}; + //int2 pixel = {1, 1}; + float3 ray_dir = projection_matrix[pixel.x + resolution->x * pixel.y]; ray_dir = (float3)( @@ -34,11 +37,27 @@ __kernel void min_kern( ray_dir.y, ray_dir.z * cos(cam_dir->y) - ray_dir.x * sin(cam_dir->y) ); +// +// float a = cam_dir->x; +// float b = cam_dir->y; +// float c = cam_dir->z; +// +// ray_dir.x = ray_dir.z * sin(b) + ray_dir.x * cos(b); +// ray_dir.y = ray_dir.y; +// ray_dir.z = ray_dir.z * cos(b) - ray_dir.x * sin(b); +// +// +// float3 ray_dir2 = (float3)( +// ray_dir.x * cos(c) - ray_dir.y * sin(c), +// ray_dir.x * sin(c) + ray_dir.y * cos(c), +// ray_dir.z); +// +// printf("%f, %f, %f", ray_dir2.x, ray_dir2.y, ray_dir2.z); ray_dir = (float3)( - ray_dir.x * cos(cam_dir->z) - ray_dir.y * sin(cam_dir->z), - ray_dir.x * sin(cam_dir->z) + ray_dir.y * cos(cam_dir->z), - ray_dir.z + ray_dir.x * cos(cam_dir->z) - ray_dir.y * sin(cam_dir->z), + ray_dir.x * sin(cam_dir->z) + ray_dir.y * cos(cam_dir->z), + ray_dir.z ); // Setup the voxel step based on what direction the ray is pointing @@ -83,11 +102,11 @@ __kernel void min_kern( int3 undershoot = voxel > 0; if (overshoot.x == 0 || overshoot.y == 0 || overshoot.z == 0 || undershoot.x == 0 || undershoot.y == 0){ - write_imagef(image, pixel, (float4)(.73, .81, .89, 1.0)); + write_imageui(image, pixel, (uint4)(50, 50, 50, 255)); return; } if (undershoot.z == 0) { - write_imagef(image, pixel, (float4)(.14, .30, .50, 1.0)); + write_imageui(image, pixel, (uint4)(14, 30, 50, 255)); return; } @@ -98,23 +117,23 @@ __kernel void min_kern( if (voxel_data != 0) { switch (voxel_data) { case 1: - write_imagef(image, pixel, (float4)(.50, .00, .00, 1)); + write_imageui(image, pixel, (uint4)(50, 0, 0, 255)); return; case 2: - write_imagef(image, pixel, (float4)(.00, .50, .40, 1.00)); + write_imageui(image, pixel, (uint4)(0, 50, 40, 255)); return; case 3: - write_imagef(image, pixel, (float4)(.00, .00, .50, 1.00)); + write_imageui(image, pixel, (uint4)(0, 0, 50, 255)); return; case 4: - write_imagef(image, pixel, (float4)(.25, .00, .25, 1.00)); + write_imageui(image, pixel, (uint4)(25, 0, 25, 255)); return; case 5: - //write_imagef(image, pixel, (float4)(.25, .00, .25, 1.00)); - write_imagef(image, pixel, white_light((float4)(.25, .32, .14, 0.2), (float3)(lights[7], lights[8], lights[9]), mask)); + //write_imageui(image, pixel, (uint4)(200, 200, 200, 255)); + write_imageui(image, pixel, white_light((uint4)(225, 232, 214, 100), (float3)(lights[7], lights[8], lights[9]), mask)); return; case 6: - write_imagef(image, pixel, (float4)(.30, .80, .10, 1.00)); + write_imageui(image, pixel, (uint4)(30, 80, 10, 255)); return; } } @@ -122,6 +141,6 @@ __kernel void min_kern( dist++; } while (dist < max_dist); - write_imagef(image, pixel, (float4)(.73, .81, .89, 1.0)); + write_imageui(image, pixel, (uint4)(73, 81, 89, 255)); return; } \ No newline at end of file diff --git a/src/CL_Wrapper.cpp b/src/CL_Wrapper.cpp index d531f02..bbff6cf 100644 --- a/src/CL_Wrapper.cpp +++ b/src/CL_Wrapper.cpp @@ -59,8 +59,9 @@ int CL_Wrapper::acquire_platform_and_device(){ // falling back to the cpu with the fastest clock if we weren't able to find one device current_best_device; - current_best_device.type = -1; // Set this to -1 so the first run always selects a new device - + current_best_device.type = 0; // Set this to -1 so the first run always selects a new device + current_best_device.clock_frequency = 0; + current_best_device.comp_units = 0; for (auto kvp: plt_ids){ @@ -86,7 +87,7 @@ int CL_Wrapper::acquire_platform_and_device(){ platform_id = current_best_device.platform; device_id = current_best_device.id; - return 0; + return 1; }; int CL_Wrapper::create_shared_context() { @@ -240,7 +241,7 @@ int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){ int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){ - const int WORKER_SIZE = 10; + const int WORKER_SIZE = 1; size_t global_work_size[1] = { static_cast(work_size) }; cl_kernel kernel = kernel_map.at(kernel_name); diff --git a/src/TestPlatform.cpp b/src/TestPlatform.cpp index e4d046c..264c39b 100644 --- a/src/TestPlatform.cpp +++ b/src/TestPlatform.cpp @@ -3,6 +3,8 @@ #include #include #include +#include + #ifdef linux @@ -13,8 +15,8 @@ #include #elif defined TARGET_OS_MAC -# include -# include +#include + #endif @@ -52,7 +54,6 @@ int IsExtensionSupported( inline int test_for_gl_cl_sharing() { - int err = 0; #if defined (__APPLE__) || defined(MACOSX) diff --git a/src/main.cpp b/src/main.cpp index 8e89114..becd999 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,15 +25,15 @@ #include #endif -#include "TestPlatform.cpp" #include "Map.h" #include "Curses.h" #include "util.hpp" #include "RayCaster.h" #include "CL_Wrapper.h" -const int WINDOW_X = 1000; -const int WINDOW_Y = 1000; +const int WINDOW_X = 200; +const int WINDOW_Y = 200; +const int WORK_SIZE = WINDOW_X * WINDOW_Y; const int MAP_X = 1024; const int MAP_Y = 1024; @@ -69,7 +69,6 @@ int main() { sf::Texture t; CL_Wrapper c; - query_platform_devices(); c.acquire_platform_and_device(); c.create_shared_context(); c.create_command_queue(); @@ -140,19 +139,18 @@ int main() { } } std::cout << "done\n"; - int ind = 367; - printf("%i === %f, %f, %f\n", ind, view_matrix[ind * 4 + 0], view_matrix[ind * 4 + 1], view_matrix[ind * 4 + 2]); cl_mem view_matrix_buff = clCreateBuffer( c.getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(float) * 3 * view_res.x * view_res.y, view_matrix, NULL + sizeof(float) * 4 * view_res.x * view_res.y, view_matrix, NULL ); - sf::Vector3f cam_dir(1.0f, 0.0f, 1.00f); + //sf::Vector3f cam_dir(1.0f, 0.0f, 1.00f); + float cam_dir[] = {1.0f, 0.0f, 1.57f, 0.0f}; cl_mem cam_dir_buff = clCreateBuffer( c.getContext(), CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(float) * 4, &cam_dir, NULL + sizeof(float) * 4, cam_dir, NULL ); @@ -221,9 +219,10 @@ int main() { c.set_kernel_arg("min_kern", 7, "light_count_buffer"); c.set_kernel_arg("min_kern", 8, "image_buffer"); - const int size = WINDOW_X * WINDOW_Y; - s.setTexture(t); + + s.setTexture(t, true); + s.setPosition(0, 0); // The step size in milliseconds between calls to Update() // Lets set it to 16.6 milliseonds (60FPS) @@ -300,7 +299,7 @@ int main() { cam_vec.x = -1; } if (sf::Keyboard::isKeyPressed(sf::Keyboard::Left)) { - cam_dir.z = -0.1f; + //cam_dir.z = -0.1f; } if (sf::Keyboard::isKeyPressed(sf::Keyboard::Right)) { cam_vec.z = +0.1f; @@ -318,15 +317,15 @@ int main() { // Mouse movement sf::Mouse::setPosition(fixed); - cam_dir.y -= deltas.y / 300.0f; - cam_dir.z -= deltas.x / 300.0f; + cam_dir[1] -= deltas.y / 300.0f; + cam_dir[2] -= deltas.x / 300.0f; } } cam_pos.x += cam_vec.x / 1.0; cam_pos.y += cam_vec.y / 1.0; cam_pos.z += cam_vec.z / 1.0; - std::cout << cam_vec.x << " : " << cam_vec.y << " : " << cam_vec.z << std::endl; + //std::cout << cam_vec.x << " : " << cam_vec.y << " : " << cam_vec.z << std::endl; // Time keeping @@ -364,7 +363,7 @@ int main() { error = clEnqueueAcquireGLObjects(c.getCommandQueue(), 1, &image_buff, 0, 0, 0); if (c.assert(error, "clEnqueueAcquireGLObjects")) return -1; - c.run_kernel("min_kern", size); + c.run_kernel("min_kern", WORK_SIZE); clFinish(c.getCommandQueue()); @@ -372,7 +371,6 @@ int main() { if (c.assert(error, "clEnqueueReleaseGLObjects")) return -1; - s.setPosition(0, 0); window.draw(s); fps.frame(delta_time);