From c1e18ce17b71ac204734adb98fcfd75883492925 Mon Sep 17 00:00:00 2001 From: mitchellhansen Date: Sun, 15 Apr 2018 22:32:22 -0700 Subject: [PATCH] Fixed small error in scale when doing the downward traversal step. For some reason OpenCL has decided to start completely skipping the downward traversal loop when the jump power is equal to 1/2 dimension. What the hell? --- include/Application.h | 10 +++-- kernels/ray_caster_kernel.cl | 77 ++++++++++++++++++++---------------- src/CLCaster.cpp | 2 +- 3 files changed, 50 insertions(+), 39 deletions(-) diff --git a/include/Application.h b/include/Application.h index 9533b4b..ddff3ed 100644 --- a/include/Application.h +++ b/include/Application.h @@ -42,10 +42,12 @@ class Application { public: - static const int WINDOW_X = 1366; - static const int WINDOW_Y = 768; -// static const int WINDOW_X = 500; -// static const int WINDOW_Y = 500; +// static const int WINDOW_X = 1366; +// static const int WINDOW_Y = 768; +// static const int WINDOW_X = 400; +// static const int WINDOW_Y = 400; + static const int WINDOW_X = 5; + static const int WINDOW_Y = 5; static const int MAP_X; static const int MAP_Y; static const int MAP_Z; diff --git a/kernels/ray_caster_kernel.cl b/kernels/ray_caster_kernel.cl index 6b0b974..eac088e 100644 --- a/kernels/ray_caster_kernel.cl +++ b/kernels/ray_caster_kernel.cl @@ -112,16 +112,16 @@ int rand(int* seed) // 1 <= *seed < m // ========================= OCTREE TRAVERSAL ============================== struct TraversalState { - +int3 sub_oct_pos; // 0 being the root node int parent_stack_position; // Holds child descriptors and their indices in the oct array - ulong parent_stack[10]; - ulong parent_stack_index[10]; + ulong parent_stack[8]; + ulong parent_stack_index[8]; // 0 being the root node uchar scale; - uchar idx_stack[10]; + uchar idx_stack[8]; // current child descriptor for this node ulong current_descriptor; @@ -129,7 +129,7 @@ struct TraversalState { // The position of the (0,0)th vox in an oct int3 oct_pos; - int3 sub_oct_pos; + // The width in voxels of the current valid masks being tested int resolution; @@ -353,18 +353,10 @@ __kernel void raycaster( convert_float3((traversal_state.sub_oct_pos - voxel.xyz) * traversal_state.resolution/2); // Andrew Woo's raycasting algo + __attribute__((opencl_unroll_hint(1))); while (distance_traveled < max_distance && bounce_count < 2) { - - // Test for out of bounds contions, add fog - if (any(voxel >= *map_dim) || any(voxel < 0)){ - voxel.xyz -= voxel_step.xyz * jump_power * face_mask.xyz; - color_accumulator = mix(fog_color, (1.0f,0.3f,0.3f,1.0f), 1.0f) - max(distance_traveled / 10.0f, 0.0f); - color_accumulator.w = 1.0f; - break; - } - - if (setting(OCTENABLED) == 0 && voxel.x < (*map_dim).x && voxel.y < (*map_dim).x && voxel.z < (*map_dim).x) { + if (setting(OCTENABLED) == 0) { // True will result in a -1, e.g (0, 0, -1) so negate it to positive face_mask = -1 * (intersection_t.xyz <= min(intersection_t.yzx, intersection_t.zxy)); @@ -414,6 +406,7 @@ __kernel void raycaster( is_valid = (traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & mask_8[mask_index]; failsafe = 0; + __attribute__((opencl_unroll_hint(1))); while (mask_index < prev_val || !is_valid) { // Clear and pop the idx stack @@ -433,7 +426,7 @@ __kernel void raycaster( // Keep track of the 0th edge of our current oct, while keeping // track of the sub_oct we're coming from - traversal_state.sub_oct_pos = traversal_state.oct_pos; + //traversal_state.sub_oct_pos = traversal_state.oct_pos; // select take the dumb MSB truth value for vector types // so we just gotta do this component wise, dumb @@ -462,10 +455,15 @@ __kernel void raycaster( // While we haven't bottomed out and the oct we're looking at is valid failsafe = 0; - while (jump_power > 1 && is_valid) { + if (jump_power == 8 && is_valid) + failsafe = 5; + if (jump_power > 1 && is_valid) + failsafe = 1; + + __attribute__((opencl_unroll_hint(1))); + for (;jump_power > 1 && is_valid;) { // If all went well and we found a valid non-leaf oct then we will traverse further down the hierarchy - traversal_state.scale++; // Count the number of valid octs that come before and add it to the index to get the position // Negate it by one as it counts itself @@ -506,51 +504,62 @@ __kernel void raycaster( traversal_state.oct_pos.x += (jump_power); // Set the idx to represent the move - traversal_state.idx_stack[traversal_state.scale+1] |= idx_set_x_mask; + traversal_state.idx_stack[traversal_state.scale] |= idx_set_x_mask; } if (voxel.y >= (jump_power) + traversal_state.oct_pos.y) { traversal_state.oct_pos.y += (jump_power); - traversal_state.idx_stack[traversal_state.scale+1] |= idx_set_y_mask; + traversal_state.idx_stack[traversal_state.scale] |= idx_set_y_mask; } if (voxel.z >= (jump_power) + traversal_state.oct_pos.z) { traversal_state.oct_pos.z += (jump_power); - traversal_state.idx_stack[traversal_state.scale+1] |= idx_set_z_mask; + traversal_state.idx_stack[traversal_state.scale] |= idx_set_z_mask; } -jump_power /= 2; + jump_power /= 2; + // Update the mask index with the new voxel we walked down to, and then check it's valid status mask_index = traversal_state.idx_stack[traversal_state.scale]; is_valid = (traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & mask_8[mask_index]; + traversal_state.scale++; + failsafe++; if (failsafe > 10) break; } +traversal_state.sub_oct_pos = traversal_state.oct_pos; + while (true){ + if (voxel.x >= (jump_power) + traversal_state.oct_pos.x) { + traversal_state.sub_oct_pos.x += (jump_power); + traversal_state.idx_stack[traversal_state.scale] |= idx_set_x_mask; + } + if (voxel.y >= (jump_power) + traversal_state.oct_pos.y) { + traversal_state.sub_oct_pos.y += (jump_power); + traversal_state.idx_stack[traversal_state.scale] |= idx_set_y_mask; + } + if (voxel.z >= (jump_power) + traversal_state.oct_pos.z) { + traversal_state.sub_oct_pos.z += (jump_power); + traversal_state.idx_stack[traversal_state.scale] |= idx_set_z_mask; + } - traversal_state.sub_oct_pos = traversal_state.oct_pos; - if (voxel.x >= (jump_power) + traversal_state.oct_pos.x) { - traversal_state.sub_oct_pos.x += (jump_power); - } - if (voxel.y >= (jump_power) + traversal_state.oct_pos.y) { - traversal_state.sub_oct_pos.y += (jump_power); - } - if (voxel.z >= (jump_power) + traversal_state.oct_pos.z) { - traversal_state.sub_oct_pos.z += (jump_power); + break; } + traversal_state = traversal_state; // Add the delta for the jump power and the traversed face intersection_t += delta_t * jump_power * fabs(convert_float3(face_mask.xyz)); // Get the other faces - int3 other_faces = select((int3)(1,1,1), (int3)(0,0,0), (int3)(face_mask == 1)); + //int3 other_faces = select((int3)(1,1,1), (int3)(0,0,0), (int3)(face_mask == 1)); // Get the amount of times we need to multiply the delta t to get to our face - uint3 multiplier = convert_uint3(abs(traversal_state.oct_pos - last_oct_pos) * (1.0f/prev_jump_power)); + //uint3 multiplier = convert_uint3(abs(traversal_state.oct_pos - last_oct_pos) * (1.0f/prev_jump_power)); + + //last_oct_pos = traversal_state.oct_pos; - last_oct_pos = traversal_state.oct_pos; // Go back to the beginning intersection t's for the non traversed faces //intersection_t -= delta_t * prev_jump_power * convert_float3(other_faces.xyz); diff --git a/src/CLCaster.cpp b/src/CLCaster.cpp index 80a8b86..1a096d6 100644 --- a/src/CLCaster.cpp +++ b/src/CLCaster.cpp @@ -768,7 +768,7 @@ bool CLCaster::compile_kernel(std::string kernel_source, bool is_path, std::stri build_string_stream << " -D" << define.first << "=" << define.second; } - build_string_stream << " -cl-finite-math-only -cl-fast-relaxed-math -cl-unsafe-math-optimizations"; + build_string_stream << " -cl-finite-math-only -cl-fast-relaxed-math -cl-unsafe-math-optimizations -cl-opt-disable"; std::string build_string = build_string_stream.str();