From 76189ef0b4d7acbc36be2271a70274eb61876f87 Mon Sep 17 00:00:00 2001 From: MitchellHansen Date: Sat, 14 Oct 2017 14:18:26 -0700 Subject: [PATCH] Optimizing, fixing things in the kernel. More oct work --- include/map/Octree.h | 10 +---- kernels/ray_caster_kernel.cl | 73 +++++++++++++++++------------------- src/map/Map.cpp | 64 ++++++++++++++++++------------- src/map/Octree.cpp | 30 ++++++++------- 4 files changed, 90 insertions(+), 87 deletions(-) diff --git a/include/map/Octree.h b/include/map/Octree.h index bd2df4c..4002e80 100644 --- a/include/map/Octree.h +++ b/include/map/Octree.h @@ -77,16 +77,10 @@ public: static const uint8_t idx_set_z_mask = 0x4; // Mask for checking if valid or leaf - const uint8_t mask_8[8] = { - 0x1, 0x2, 0x4, 0x8, - 0x10, 0x20, 0x40, 0x80 - }; + static const uint8_t mask_8[8]; // Mask for counting the previous valid bits - const uint8_t count_mask_8[8] = { - 0x1, 0x3, 0x7, 0xF, - 0x1F, 0x3F, 0x7F, 0xFF - }; + static const uint8_t count_mask_8[8]; // uint64_t manipulation masks diff --git a/kernels/ray_caster_kernel.cl b/kernels/ray_caster_kernel.cl index 8ba1e2d..f906c10 100644 --- a/kernels/ray_caster_kernel.cl +++ b/kernels/ray_caster_kernel.cl @@ -16,6 +16,7 @@ __constant int2 zeroed_int2 = {0, 0}; __constant const uchar idx_set_x_mask = 0x1; __constant const uchar idx_set_y_mask = 0x2; __constant const uchar idx_set_z_mask = 0x4; +__constant const uchar idx_set_mask = {0x1, 0x2, 0x4}; __constant const uchar mask_8[8] = { 0x1, 0x2, 0x4, 0x8, @@ -113,7 +114,6 @@ bool get_oct_vox( ulong current_index = *settings_buffer; ulong head = octree_descriptor_buffer[current_index]; - uint parent_stack_position = 0; ulong parent_stack[32]; uchar scale = 0; @@ -123,7 +123,7 @@ bool get_oct_vox( bool found = false; - parent_stack[parent_stack_position] = head; + parent_stack[scale] = head; // Set our initial dimension and the position at the corner of the oct to keep track of our position int dimension = OCTDIM; @@ -139,40 +139,37 @@ bool get_oct_vox( // // No? // Break - while (dimension > 1) { + while (dimension > 64) { // So we can be a little bit tricky here and increment our // array index that holds our masks as we build the idx. // Adding 1 for X, 2 for Y, and 4 for Z - int mask_index = 0; + idx_stack[scale] = 0; - // Do the logic steps to find which sub oct we step down into + // Do the logic steps to find which sub oct we step down into if (position.x >= (dimension / 2) + quad_position.x) { // Set our voxel position to the (0,0) of the correct oct quad_position.x += (dimension / 2); - // increment the mask index and mentioned above - mask_index += 1; - // Set the idx to represent the move idx_stack[scale] |= idx_set_x_mask; } if (position.y >= (dimension / 2) + quad_position.y) { - quad_position.y |= (dimension / 2); - mask_index += 2; + quad_position.y += (dimension / 2); idx_stack[scale] |= idx_set_y_mask; } if (position.z >= (dimension / 2) + quad_position.z) { quad_position.z += (dimension / 2); - mask_index += 4; idx_stack[scale] |= idx_set_z_mask; } + int mask_index = idx_stack[scale]; + // Check to see if we are on a valid oct if ((head >> 16) & mask_8[mask_index]) { @@ -205,9 +202,8 @@ bool get_oct_vox( } head = octree_descriptor_buffer[current_index]; - // Increment the parent stack position and put the new oct node as the parent - parent_stack_position++; - parent_stack[parent_stack_position] = head; + + parent_stack[scale] = head; } else { @@ -285,7 +281,13 @@ __kernel void raycaster( // for all 3 axis XYZ. We take the full positive cardinality when // subtracting the floor, so we must transfer the sign over from // the voxel step - float3 intersection_t = delta_t * ((*cam_pos) - ceil(*cam_pos)) * convert_float3(voxel_step); + + // handle the case where we're smack on 0 for the camera position + float modifier = 0.0f; + if (any(((*cam_pos) - ceil(*cam_pos) == 0.0f))) + modifier = 0.000001f; + + float3 intersection_t = delta_t * ((*cam_pos) - ceil(*cam_pos) + modifier) * convert_float3(voxel_step); // When we transfer the sign over, we get the correct direction of // the offset, but we merely transposed over the value instead of mirroring @@ -309,7 +311,7 @@ __kernel void raycaster( bool shadow_ray = false; // Andrew Woo's raycasting algo - while (distance_traveled < max_distance && bounce_count < 4) { + while (distance_traveled < max_distance && bounce_count < 2) { // Fancy no branch version of the logic step face_mask = intersection_t.xyz <= min(intersection_t.yzx, intersection_t.zxy); @@ -326,26 +328,26 @@ __kernel void raycaster( constant int vox_dim = OCTDIM; - // If we hit a voxel - if (voxel.x < vox_dim && voxel.y < vox_dim && voxel.z < vox_dim){ - if (get_oct_vox( - voxel, - octree_descriptor_buffer, - octree_attachment_lookup_buffer, - octree_attachment_buffer, - settings_buffer - )){ - voxel_data = 5; - } else { - voxel_data = 0; - } - } else { + // // If we hit a voxel + // if (voxel.x < vox_dim && voxel.y < vox_dim && voxel.z < vox_dim){ + // if (get_oct_vox( + // voxel, + // octree_descriptor_buffer, + // octree_attachment_lookup_buffer, + // octree_attachment_buffer, + // settings_buffer + // )){ + // voxel_data = 5; + // } else { + // voxel_data = 0; + // } + // } else { voxel_data = map[voxel.x + (*map_dim).x * (voxel.y + (*map_dim).z * (voxel.z))]; - } + //} - if (voxel_data != 0) { + if (voxel_data == 5 || voxel_data == 6) { // Determine where on the 2d plane the ray intersected face_position = zeroed_float3; tile_face_position = zeroed_float2; @@ -415,7 +417,7 @@ __kernel void raycaster( // Now we detect what type of of voxel we intersected and decide whether // to bend the ray, send out a light intersection ray, or add texture color - // TEXTURE HIT + SHADOW REDIRECTION + // TEXTURE HIT + SHADOW RAY REDIRECTION if (voxel_data == 5 && !shadow_ray){ shadow_ray = true; @@ -436,7 +438,6 @@ __kernel void raycaster( fog_distance = distance_traveled; max_distance = distance_traveled + DistanceBetweenPoints(convert_float3(voxel), (float3)(lights[4], lights[5], lights[6])); - float3 hit_pos = convert_float3(voxel) + face_position; ray_dir = normalize((float3)(lights[4], lights[5], lights[6]) - hit_pos); if (any(ray_dir == zeroed_float3)) @@ -459,8 +460,6 @@ __kernel void raycaster( ).xyz/4; voxel_color.w -= 0.0f; - //max_distance += 200; - float3 hit_pos = convert_float3(voxel) + face_position; ray_dir *= sign; @@ -471,8 +470,6 @@ __kernel void raycaster( voxel_step = ( 1, 1, 1 ); voxel_step *= (ray_dir > 0) - (ray_dir < 0); - //voxel = convert_int3(hit_pos); - delta_t = fabs(1.0f / ray_dir); intersection_t = delta_t * ((hit_pos)-floor(hit_pos)) * convert_float3(voxel_step); intersection_t += delta_t * -convert_float3(isless(intersection_t, 0)); diff --git a/src/map/Map.cpp b/src/map/Map.cpp index 3ab0b32..39e1b43 100644 --- a/src/map/Map.cpp +++ b/src/map/Map.cpp @@ -52,17 +52,19 @@ Map::Map(uint32_t dimensions, Old_Map* array_map) { bool Map::test_oct_arr_traversal(sf::Vector3i dimensions) { - sf::Vector2f cam_dir(0.95, 0.81); - sf::Vector3f cam_pos(10.5, 10.5, 10.5); - std::vector> list1 = CastRayCharArray(voxel_data, &dimensions, &cam_dir, &cam_pos); - std::vector> list2 = CastRayOctree(&octree, &dimensions, &cam_dir, &cam_pos); - - if (list1 != list2) { - return false; - } else { - return true; - } + //sf::Vector2f cam_dir(0.95, 0.81); + //sf::Vector3f cam_pos(10.5, 10.5, 10.5); + //std::vector> list1 = CastRayCharArray(voxel_data, &dimensions, &cam_dir, &cam_pos); + //std::vector> list2 = CastRayOctree(&octree, &dimensions, &cam_dir, &cam_pos); + + //if (list1 != list2) { + // return false; + //} else { + // return true; + //} + + return false; } void Map::setVoxel(sf::Vector3i pos, int val) { @@ -117,12 +119,6 @@ std::vector> Map::CastRayCharArray( voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0); - // ================================================================================================= - // ================================================================================================= - // ================================================================================================= - // ================================================================================================= - - // Delta T is the units a ray must travel along an axis in order to // traverse an integer split sf::Vector3f delta_t( @@ -196,7 +192,7 @@ std::vector> Map::CastRayOctree( ) { // Setup the voxel coords from the camera origin - sf::Vector3i voxel(*cam_pos); + sf::Vector3i voxel(0,0,0); // THIS DOES NOT HAVE TO RETURN TRUE ON FOUND // This function when passed an "air" voxel will return as far down @@ -238,9 +234,8 @@ std::vector> Map::CastRayOctree( voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0); // set the jump multiplier based on the traversal state vs the log base 2 of the maps dimensions - int jump_power = 1; - if (log2(map_dim->x) != traversal_state.scale) - jump_power = pow(2, traversal_state.scale); + int jump_power = log2(map_dim->x) - traversal_state.scale; + // Delta T is the units a ray must travel along an axis in order to // traverse an integer split @@ -316,17 +311,21 @@ std::vector> Map::CastRayOctree( } traversal_state.idx_stack[traversal_state.scale] ^= this_face_mask; + int mask_index = traversal_state.idx_stack[traversal_state.scale]; // Check to see if the idx increased or decreased // If it decreased - // Pop up the stack until the oct that the ray is within is valid. - while (traversal_state.idx_stack[traversal_state.scale] < prev_val) { + // Pop up the stack until the oct that the idx flip is valid and we landed on a valid oct + while (traversal_state.idx_stack[traversal_state.scale] < prev_val || + !((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & Octree::mask_8[mask_index]) + ) { jump_power *= 2; - traversal_state.oct_pos.x; - traversal_state.oct_pos.y; - traversal_state.oct_pos.z; + // Keep track of the 0th edge of out current oct + traversal_state.oct_pos.x = floor(voxel.x / 2) * jump_power; + traversal_state.oct_pos.y = floor(voxel.x / 2) * jump_power; + traversal_state.oct_pos.z = floor(voxel.x / 2) * jump_power; // Clear and pop the idx stack traversal_state.idx_stack[traversal_state.scale] = 0; @@ -345,10 +344,21 @@ std::vector> Map::CastRayOctree( // Apply the face mask to the new idx for the while check traversal_state.idx_stack[traversal_state.scale] ^= this_face_mask; - + mask_index = traversal_state.idx_stack[traversal_state.scale]; } - + + // Check to see if we are on a valid oct + //if ((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & Octree::mask_8[mask_index]) { + + // // Check to see if it is a leaf + // if ((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 24) & Octree::mask_8[mask_index]) { + + // // If it is, then we cannot traverse further as CP's won't have been generated + // state.found = 1; + // return state; + // } + //} // Check to see if we are on top of a valid branch // Traverse down to the lowest valid oct that the ray is within diff --git a/src/map/Octree.cpp b/src/map/Octree.cpp index 5490945..c67b7f1 100644 --- a/src/map/Octree.cpp +++ b/src/map/Octree.cpp @@ -69,30 +69,20 @@ OctState Octree::GetVoxel(sf::Vector3i position) { // Break while (dimension > 1) { - // So we can be a little bit tricky here and increment our - // array index that holds our masks as we build the idx. - // Adding 1 for X, 2 for Y, and 4 for Z - int mask_index = 0; - - // Do the logic steps to find which sub oct we step down into if (position.x >= (dimension / 2) + state.oct_pos.x) { // Set our voxel position to the (0,0) of the correct oct state.oct_pos.x += (dimension / 2); - // increment the mask index and mentioned above - mask_index += 1; - // Set the idx to represent the move state.idx_stack[state.scale] |= idx_set_x_mask; } if (position.y >= (dimension / 2) + state.oct_pos.y) { - state.oct_pos.y |= (dimension / 2); - - mask_index += 2; + // TODO What the hell is going on with the or operator on this one!??!?!?! + state.oct_pos.y += (dimension / 2); // TODO What is up with the XOR operator that was on this one? state.idx_stack[state.scale] |= idx_set_y_mask; @@ -102,12 +92,14 @@ OctState Octree::GetVoxel(sf::Vector3i position) { state.oct_pos.z += (dimension / 2); - mask_index += 4; - state.idx_stack[state.scale] |= idx_set_z_mask; } + // Our count mask matches the way we index our idx so we can just + // copy it over + int mask_index = state.idx_stack[state.scale]; + // Check to see if we are on a valid oct if ((head >> 16) & mask_8[mask_index]) { @@ -367,3 +359,13 @@ bool Octree::Validate(char* data, sf::Vector3i dimensions){ unsigned int Octree::getDimensions() { return oct_dimensions; } + +const uint8_t Octree::mask_8[8] = { + 0x1, 0x2, 0x4, 0x8, + 0x10, 0x20, 0x40, 0x80 +}; + +const uint8_t Octree::count_mask_8[8] = { + 0x1, 0x3, 0x7, 0xF, + 0x1F, 0x3F, 0x7F, 0xFF +};