diff --git a/kernels/minimal_kernel.cl b/kernels/minimal_kernel.cl index 30f0740..565df95 100644 --- a/kernels/minimal_kernel.cl +++ b/kernels/minimal_kernel.cl @@ -1,15 +1,15 @@ -uint4 white_light(uint4 input, float3 light, int3 mask) { +float4 white_light(float4 input, float3 light, int3 mask) { input.w = input.w + acos( dot( normalize(light), normalize(fabs(convert_float3(mask))) ) - ) * 50; + ) / 2; - return (input); + return input; } @@ -25,34 +25,30 @@ __kernel void min_kern( __write_only image2d_t image ){ - // Get the pixel position of this worker size_t id = get_global_id(0); int2 pixel = {id % resolution->x, id / resolution->x}; - - - // Slew the ray into it's correct position based on the view matrix's starting position - // and the camera's current direction - float3 ray_dir = projection_matrix[pixel.x + resolution->x * pixel.y]; - // Yaw ray_dir = (float3)( ray_dir.z * sin(cam_dir->y) + ray_dir.x * cos(cam_dir->y), ray_dir.y, ray_dir.z * cos(cam_dir->y) - ray_dir.x * sin(cam_dir->y) ); - // Pitch ray_dir = (float3)( - ray_dir.x * cos(cam_dir->z) - ray_dir.y * sin(cam_dir->z), - ray_dir.x * sin(cam_dir->z) + ray_dir.y * cos(cam_dir->z), - ray_dir.z + ray_dir.x * cos(cam_dir->z) - ray_dir.y * sin(cam_dir->z), + ray_dir.x * sin(cam_dir->z) + ray_dir.y * cos(cam_dir->z), + ray_dir.z ); // Setup the voxel step based on what direction the ray is pointing int3 voxel_step = {1, 1, 1}; voxel_step *= (ray_dir > 0) - (ray_dir < 0); + /*voxel_step.x *= (ray_dir.x > 0) - (ray_dir.x < 0); + voxel_step.y *= (ray_dir.y > 0) - (ray_dir.y < 0); + voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0);*/ + // Setup the voxel coords from the camera origin int3 voxel = convert_int3(*cam_pos); @@ -64,26 +60,21 @@ __kernel void min_kern( // for all 3 axis XYZ. float3 intersection_t = delta_t; - // Create a psuedo random number for view fog int2 randoms = { 3, 14 }; uint seed = randoms.x + id; uint t = seed ^ (seed << 11); uint result = randoms.y ^ (randoms.y >> 19) ^ (t ^ (t >> 8)); - // Distance a ray can travel before it terminates - int max_dist = 200 + result % 50; + int max_dist = 500 + result % 50; int dist = 0; - // Bitmask to keep track of which axis was tripped int3 mask = { 0, 0, 0 }; // Andrew Woo's raycasting algo do { - // Non-branching test of the lowest delta_t value mask = intersection_t.xyz <= min(intersection_t.yzx, intersection_t.zxy); - - // Based on the result increment the voxel and intersection + float3 thing = delta_t * fabs(convert_float3(mask.xyz)); intersection_t += delta_t * fabs(convert_float3(mask.xyz)); voxel.xyz += voxel_step.xyz * mask.xyz; @@ -91,15 +82,12 @@ __kernel void min_kern( int3 overshoot = voxel <= *map_dim; int3 undershoot = voxel > 0; - // "Sky" if (overshoot.x == 0 || overshoot.y == 0 || overshoot.z == 0 || undershoot.x == 0 || undershoot.y == 0){ - write_imageui(image, pixel, (uint4)(135, 206, 235, 255)); + write_imagef(image, pixel, (float4)(.73, .81, .89, 1.0)); return; } - - // "Water" if (undershoot.z == 0) { - write_imageui(image, pixel, (uint4)(64, 164, 223, 255)); + write_imagef(image, pixel, (float4)(.14, .30, .50, 1.0)); return; } @@ -110,23 +98,23 @@ __kernel void min_kern( if (voxel_data != 0) { switch (voxel_data) { case 1: - write_imageui(image, pixel, (uint4)(50, 0, 0, 255)); + write_imagef(image, pixel, (float4)(.50, .00, .00, 1)); return; case 2: - write_imageui(image, pixel, (uint4)(0, 50, 40, 255)); + write_imagef(image, pixel, (float4)(.00, .50, .40, 1.00)); return; case 3: - write_imageui(image, pixel, (uint4)(0, 0, 50, 255)); + write_imagef(image, pixel, (float4)(.00, .00, .50, 1.00)); return; case 4: - write_imageui(image, pixel, (uint4)(25, 0, 25, 255)); + write_imagef(image, pixel, (float4)(.25, .00, .25, 1.00)); return; case 5: - //write_imageui(image, pixel, (uint4)(200, 200, 200, 255)); - write_imageui(image, pixel, white_light((uint4)(44, 176, 55, 100), (float3)(lights[7], lights[8], lights[9]), mask)); + //write_imagef(image, pixel, (float4)(.25, .00, .25, 1.00)); + write_imagef(image, pixel, white_light((float4)(.25, .32, .14, 0.2), (float3)(lights[7], lights[8], lights[9]), mask)); return; case 6: - write_imageui(image, pixel, (uint4)(30, 80, 10, 255)); + write_imagef(image, pixel, (float4)(.30, .80, .10, 1.00)); return; } } @@ -134,6 +122,6 @@ __kernel void min_kern( dist++; } while (dist < max_dist); - write_imageui(image, pixel, (uint4)(135, 206, 235, 255)); + write_imagef(image, pixel, (float4)(.73, .81, .89, 1.0)); return; } \ No newline at end of file diff --git a/src/CL_Wrapper.cpp b/src/CL_Wrapper.cpp index bbff6cf..d531f02 100644 --- a/src/CL_Wrapper.cpp +++ b/src/CL_Wrapper.cpp @@ -59,9 +59,8 @@ int CL_Wrapper::acquire_platform_and_device(){ // falling back to the cpu with the fastest clock if we weren't able to find one device current_best_device; - current_best_device.type = 0; // Set this to -1 so the first run always selects a new device - current_best_device.clock_frequency = 0; - current_best_device.comp_units = 0; + current_best_device.type = -1; // Set this to -1 so the first run always selects a new device + for (auto kvp: plt_ids){ @@ -87,7 +86,7 @@ int CL_Wrapper::acquire_platform_and_device(){ platform_id = current_best_device.platform; device_id = current_best_device.id; - return 1; + return 0; }; int CL_Wrapper::create_shared_context() { @@ -241,7 +240,7 @@ int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){ int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){ - const int WORKER_SIZE = 1; + const int WORKER_SIZE = 10; size_t global_work_size[1] = { static_cast(work_size) }; cl_kernel kernel = kernel_map.at(kernel_name); diff --git a/src/TestPlatform.cpp b/src/TestPlatform.cpp index 264c39b..e4d046c 100644 --- a/src/TestPlatform.cpp +++ b/src/TestPlatform.cpp @@ -3,8 +3,6 @@ #include #include #include -#include - #ifdef linux @@ -15,8 +13,8 @@ #include #elif defined TARGET_OS_MAC -#include - +# include +# include #endif @@ -54,6 +52,7 @@ int IsExtensionSupported( inline int test_for_gl_cl_sharing() { + int err = 0; #if defined (__APPLE__) || defined(MACOSX) diff --git a/src/main.cpp b/src/main.cpp index becd999..8e89114 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,15 +25,15 @@ #include #endif +#include "TestPlatform.cpp" #include "Map.h" #include "Curses.h" #include "util.hpp" #include "RayCaster.h" #include "CL_Wrapper.h" -const int WINDOW_X = 200; -const int WINDOW_Y = 200; -const int WORK_SIZE = WINDOW_X * WINDOW_Y; +const int WINDOW_X = 1000; +const int WINDOW_Y = 1000; const int MAP_X = 1024; const int MAP_Y = 1024; @@ -69,6 +69,7 @@ int main() { sf::Texture t; CL_Wrapper c; + query_platform_devices(); c.acquire_platform_and_device(); c.create_shared_context(); c.create_command_queue(); @@ -139,18 +140,19 @@ int main() { } } std::cout << "done\n"; + int ind = 367; + printf("%i === %f, %f, %f\n", ind, view_matrix[ind * 4 + 0], view_matrix[ind * 4 + 1], view_matrix[ind * 4 + 2]); cl_mem view_matrix_buff = clCreateBuffer( c.getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(float) * 4 * view_res.x * view_res.y, view_matrix, NULL + sizeof(float) * 3 * view_res.x * view_res.y, view_matrix, NULL ); - //sf::Vector3f cam_dir(1.0f, 0.0f, 1.00f); + sf::Vector3f cam_dir(1.0f, 0.0f, 1.00f); - float cam_dir[] = {1.0f, 0.0f, 1.57f, 0.0f}; cl_mem cam_dir_buff = clCreateBuffer( c.getContext(), CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - sizeof(float) * 4, cam_dir, NULL + sizeof(float) * 4, &cam_dir, NULL ); @@ -219,10 +221,9 @@ int main() { c.set_kernel_arg("min_kern", 7, "light_count_buffer"); c.set_kernel_arg("min_kern", 8, "image_buffer"); + const int size = WINDOW_X * WINDOW_Y; - - s.setTexture(t, true); - s.setPosition(0, 0); + s.setTexture(t); // The step size in milliseconds between calls to Update() // Lets set it to 16.6 milliseonds (60FPS) @@ -299,7 +300,7 @@ int main() { cam_vec.x = -1; } if (sf::Keyboard::isKeyPressed(sf::Keyboard::Left)) { - //cam_dir.z = -0.1f; + cam_dir.z = -0.1f; } if (sf::Keyboard::isKeyPressed(sf::Keyboard::Right)) { cam_vec.z = +0.1f; @@ -317,15 +318,15 @@ int main() { // Mouse movement sf::Mouse::setPosition(fixed); - cam_dir[1] -= deltas.y / 300.0f; - cam_dir[2] -= deltas.x / 300.0f; + cam_dir.y -= deltas.y / 300.0f; + cam_dir.z -= deltas.x / 300.0f; } } cam_pos.x += cam_vec.x / 1.0; cam_pos.y += cam_vec.y / 1.0; cam_pos.z += cam_vec.z / 1.0; - //std::cout << cam_vec.x << " : " << cam_vec.y << " : " << cam_vec.z << std::endl; + std::cout << cam_vec.x << " : " << cam_vec.y << " : " << cam_vec.z << std::endl; // Time keeping @@ -363,7 +364,7 @@ int main() { error = clEnqueueAcquireGLObjects(c.getCommandQueue(), 1, &image_buff, 0, 0, 0); if (c.assert(error, "clEnqueueAcquireGLObjects")) return -1; - c.run_kernel("min_kern", WORK_SIZE); + c.run_kernel("min_kern", size); clFinish(c.getCommandQueue()); @@ -371,6 +372,7 @@ int main() { if (c.assert(error, "clEnqueueReleaseGLObjects")) return -1; + s.setPosition(0, 0); window.draw(s); fps.frame(delta_time);