Optimizing, fixing things in the kernel. More oct work

8 years ago · 76189ef0b4
parent 8894d5e3a7
commit 76189ef0b4
4 changed files with 90 additions and 87 deletions
--- a/include/map/Octree.h
+++ b/include/map/Octree.h
@ -77,16 +77,10 @@ public:
 	static const uint8_t idx_set_z_mask = 0x4;

 	// Mask for checking if valid or leaf
-	const uint8_t mask_8[8] = {
-		0x1,  0x2,  0x4,  0x8,
-		0x10, 0x20, 0x40, 0x80
-	};
+	static const uint8_t mask_8[8];

 	// Mask for counting the previous valid bits
-	const uint8_t count_mask_8[8] = {
-		0x1,  0x3,  0x7,  0xF,
-		0x1F, 0x3F, 0x7F, 0xFF
-	};
+	static const uint8_t count_mask_8[8];


 	// uint64_t manipulation masks
--- a/kernels/ray_caster_kernel.cl
+++ b/kernels/ray_caster_kernel.cl
@ -16,6 +16,7 @@ __constant int2 zeroed_int2     = {0, 0};
 __constant const uchar idx_set_x_mask = 0x1;
 __constant const uchar idx_set_y_mask = 0x2;
 __constant const uchar idx_set_z_mask = 0x4;
+__constant const uchar idx_set_mask = {0x1, 0x2, 0x4};

 __constant const uchar mask_8[8] = {
 	0x1,  0x2,  0x4,  0x8,
@ -113,7 +114,6 @@ bool get_oct_vox(
 	ulong current_index = *settings_buffer;
 	ulong head = octree_descriptor_buffer[current_index];

-	uint parent_stack_position = 0;
 	ulong parent_stack[32];

 	uchar scale = 0;
@ -123,7 +123,7 @@ bool get_oct_vox(

 	bool found = false;

-	parent_stack[parent_stack_position] = head;
+	parent_stack[scale] = head;

 	// Set our initial dimension and the position at the corner of the oct to keep track of our position
 	int dimension = OCTDIM;
@ -139,40 +139,37 @@ bool get_oct_vox(
 	//
 	//			No?
 	//				Break
-	while (dimension > 1) {
+	while (dimension > 64) {

 		// So we can be a little bit tricky here and increment our
 		// array index that holds our masks as we build the idx.
 		// Adding 1 for X, 2 for Y, and 4 for Z
-		int mask_index = 0;
+		idx_stack[scale] = 0;

-		// Do the logic steps to find which sub oct we step down into
+		//	Do the logic steps to find which sub oct we step down into
 		if (position.x >= (dimension / 2) + quad_position.x) {

 			// Set our voxel position to the (0,0) of the correct oct
 			quad_position.x += (dimension / 2);

-			// increment the mask index and mentioned above
-			mask_index += 1;
-
 			// Set the idx to represent the move
 			idx_stack[scale] |= idx_set_x_mask;

 		}
 		if (position.y >= (dimension / 2) + quad_position.y) {

-			quad_position.y |= (dimension / 2);
-			mask_index += 2;
+			quad_position.y += (dimension / 2);
 			idx_stack[scale] |= idx_set_y_mask;

 		}
 		if (position.z >= (dimension / 2) + quad_position.z) {

 			quad_position.z += (dimension / 2);
-			mask_index += 4;
 			idx_stack[scale] |= idx_set_z_mask;
 		}

+		int mask_index = idx_stack[scale];
+
 		// Check to see if we are on a valid oct
 		if ((head >> 16) & mask_8[mask_index]) {

@ -205,9 +202,8 @@ bool get_oct_vox(
 			}
 			head = octree_descriptor_buffer[current_index];

-			// Increment the parent stack position and put the new oct node as the parent
-			parent_stack_position++;
-			parent_stack[parent_stack_position] = head;
+
+			parent_stack[scale] = head;

 		}
 		else {
@ -285,7 +281,13 @@ __kernel void raycaster(
 	// for all 3 axis XYZ. We take the full positive cardinality when
 	// subtracting the floor, so we must transfer the sign over from
 	// the voxel step
-	float3 intersection_t = delta_t * ((*cam_pos) - ceil(*cam_pos)) * convert_float3(voxel_step);
+
+	// handle the case where we're smack on 0 for the camera position
+	float modifier = 0.0f;
+	if (any(((*cam_pos) - ceil(*cam_pos) == 0.0f)))
+		modifier = 0.000001f;
+
+	float3 intersection_t = delta_t * ((*cam_pos) - ceil(*cam_pos) + modifier) * convert_float3(voxel_step);

 	// When we transfer the sign over, we get the correct direction of
 	// the offset, but we merely transposed over the value instead of mirroring
@ -309,7 +311,7 @@ __kernel void raycaster(
 	bool shadow_ray = false;

 	// Andrew Woo's raycasting algo
-    while (distance_traveled < max_distance && bounce_count < 4) {
+    while (distance_traveled < max_distance && bounce_count < 2) {

 		// Fancy no branch version of the logic step
 		face_mask = intersection_t.xyz <= min(intersection_t.yzx, intersection_t.zxy);
@ -326,26 +328,26 @@ __kernel void raycaster(

 		constant int vox_dim = OCTDIM;

-        // If we hit a voxel
-		if (voxel.x < vox_dim && voxel.y < vox_dim && voxel.z < vox_dim){
-		 	if (get_oct_vox(
-		 		voxel,
-		 		octree_descriptor_buffer,
-		 		octree_attachment_lookup_buffer,
-		 		octree_attachment_buffer,
-		 		settings_buffer
-		 		)){
-		 			voxel_data = 5;
-		 		} else {
-		 			voxel_data = 0;
-		 		}
-		} else {
+        // // If we hit a voxel
+		// if (voxel.x < vox_dim && voxel.y < vox_dim && voxel.z < vox_dim){
+		//  	if (get_oct_vox(
+		//  		voxel,
+		//  		octree_descriptor_buffer,
+		//  		octree_attachment_lookup_buffer,
+		//  		octree_attachment_buffer,
+		//  		settings_buffer
+		//  		)){
+		//  			voxel_data = 5;
+		//  		} else {
+		//  			voxel_data = 0;
+		//  		}
+		// } else {
 			voxel_data = map[voxel.x + (*map_dim).x * (voxel.y + (*map_dim).z * (voxel.z))];
-		}
+		//}


-		if (voxel_data != 0) {

+		if (voxel_data == 5 || voxel_data == 6) {
 			// Determine where on the 2d plane the ray intersected
 			face_position = zeroed_float3;
 			tile_face_position = zeroed_float2;
@ -415,7 +417,7 @@ __kernel void raycaster(
 			// Now we detect what type of of voxel we intersected and decide whether
 			// to bend the ray, send out a light intersection ray, or add texture color

-			// TEXTURE HIT + SHADOW REDIRECTION
+			// TEXTURE HIT + SHADOW RAY REDIRECTION
 			if (voxel_data == 5 && !shadow_ray){

 				shadow_ray = true;
@ -436,7 +438,6 @@ __kernel void raycaster(
 				fog_distance = distance_traveled;
 				max_distance = distance_traveled + DistanceBetweenPoints(convert_float3(voxel), (float3)(lights[4], lights[5], lights[6]));

-
 				float3 hit_pos = convert_float3(voxel) + face_position;
 				ray_dir = normalize((float3)(lights[4], lights[5], lights[6]) - hit_pos);
 				if (any(ray_dir == zeroed_float3))
@ -459,8 +460,6 @@ __kernel void raycaster(
 				).xyz/4;

 				voxel_color.w -= 0.0f;
-				//max_distance += 200;
-

 				float3 hit_pos = convert_float3(voxel) + face_position;
 				ray_dir *= sign;
@ -471,8 +470,6 @@ __kernel void raycaster(
 				voxel_step = ( 1, 1, 1 );
 				voxel_step *= (ray_dir > 0) - (ray_dir < 0);

-				//voxel = convert_int3(hit_pos);
-
 				delta_t = fabs(1.0f / ray_dir);
 				intersection_t = delta_t * ((hit_pos)-floor(hit_pos)) * convert_float3(voxel_step);
 				intersection_t += delta_t * -convert_float3(isless(intersection_t, 0));
--- a/src/map/Map.cpp
+++ b/src/map/Map.cpp
@ -52,17 +52,19 @@ Map::Map(uint32_t dimensions, Old_Map* array_map) {

 bool Map::test_oct_arr_traversal(sf::Vector3i dimensions) {

-	sf::Vector2f cam_dir(0.95, 0.81);
-	sf::Vector3f cam_pos(10.5, 10.5, 10.5);
-	std::vector<std::tuple<sf::Vector3i, char>> list1 = CastRayCharArray(voxel_data, &dimensions, &cam_dir, &cam_pos);
-	std::vector<std::tuple<sf::Vector3i, char>> list2 = CastRayOctree(&octree, &dimensions, &cam_dir, &cam_pos);
-
-	if (list1 != list2) {
-		return false;
-	} else {
-		return true;
-	}
+	//sf::Vector2f cam_dir(0.95, 0.81);
+	//sf::Vector3f cam_pos(10.5, 10.5, 10.5);
+	//std::vector<std::tuple<sf::Vector3i, char>> list1 = CastRayCharArray(voxel_data, &dimensions, &cam_dir, &cam_pos);
+	//std::vector<std::tuple<sf::Vector3i, char>> list2 = CastRayOctree(&octree, &dimensions, &cam_dir, &cam_pos);
+
+	//if (list1 != list2) {
+	//	return false;
+	//} else {
+	//	return true;
+	//}
+

+	return false;
 }

 void Map::setVoxel(sf::Vector3i pos, int val) {
@ -117,12 +119,6 @@ std::vector<std::tuple<sf::Vector3i, char>>  Map::CastRayCharArray(
 	voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0);


-	// =================================================================================================
-	// =================================================================================================
-	// =================================================================================================
-	// =================================================================================================
-
-
 	// Delta T is the units a ray must travel along an axis in order to
 	// traverse an integer split
 	sf::Vector3f delta_t(
@ -196,7 +192,7 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
 ) {

 	// Setup the voxel coords from the camera origin
-	sf::Vector3i voxel(*cam_pos);
+	sf::Vector3i voxel(0,0,0);

 	// THIS DOES NOT HAVE TO RETURN TRUE ON FOUND
 	// This function when passed an "air" voxel will return as far down
@ -238,9 +234,8 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
 	voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0);
 	
 	// set the jump multiplier based on the traversal state vs the log base 2 of the maps dimensions
-	int jump_power = 1;
-	if (log2(map_dim->x) != traversal_state.scale)
-		jump_power = pow(2, traversal_state.scale);
+	int jump_power = log2(map_dim->x) - traversal_state.scale;
+

 	// Delta T is the units a ray must travel along an axis in order to
 	// traverse an integer split
@ -316,17 +311,21 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
 		}

 		traversal_state.idx_stack[traversal_state.scale] ^= this_face_mask;
+		int mask_index = traversal_state.idx_stack[traversal_state.scale];

 		// Check to see if the idx increased or decreased	
 		// If it decreased
-		//		Pop up the stack until the oct that the ray is within is valid.
-		while (traversal_state.idx_stack[traversal_state.scale] < prev_val) {
+		//		Pop up the stack until the oct that the idx flip is valid and we landed on a valid oct
+		while (traversal_state.idx_stack[traversal_state.scale] < prev_val ||
+			!((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & Octree::mask_8[mask_index])
+			) {
 			
 			jump_power *= 2;
 			
-			traversal_state.oct_pos.x;
-			traversal_state.oct_pos.y;
-			traversal_state.oct_pos.z;
+			// Keep track of the 0th edge of out current oct
+			traversal_state.oct_pos.x = floor(voxel.x / 2) * jump_power;
+			traversal_state.oct_pos.y = floor(voxel.x / 2) * jump_power;
+			traversal_state.oct_pos.z = floor(voxel.x / 2) * jump_power;

 			// Clear and pop the idx stack
 			traversal_state.idx_stack[traversal_state.scale] = 0;
@ -345,10 +344,21 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
 			
 			// Apply the face mask to the new idx for the while check
 			traversal_state.idx_stack[traversal_state.scale] ^= this_face_mask;
-
+			mask_index = traversal_state.idx_stack[traversal_state.scale];
 		}
 		

+		// Check to see if we are on a valid oct
+		//if ((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & Octree::mask_8[mask_index]) {
+
+		//	// Check to see if it is a leaf
+		//	if ((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 24) & Octree::mask_8[mask_index]) {
+
+		//		// If it is, then we cannot traverse further as CP's won't have been generated
+		//		state.found = 1;
+		//		return state;
+		//	}
+		//}
 		//	Check to see if we are on top of a valid branch
 		//	Traverse down to the lowest valid oct that the ray is within

--- a/src/map/Octree.cpp
+++ b/src/map/Octree.cpp
@ -69,30 +69,20 @@ OctState Octree::GetVoxel(sf::Vector3i position) {
 	//				Break
 	while (dimension > 1) {

-		// So we can be a little bit tricky here and increment our
-		// array index that holds our masks as we build the idx. 
-		// Adding 1 for X, 2 for Y, and 4 for Z
-		int mask_index = 0;
-
-
 		// Do the logic steps to find which sub oct we step down into
 		if (position.x >= (dimension / 2) + state.oct_pos.x) {

 			// Set our voxel position to the (0,0) of the correct oct
 			state.oct_pos.x += (dimension / 2);

-			// increment the mask index and mentioned above
-			mask_index += 1;
-
 			// Set the idx to represent the move
 			state.idx_stack[state.scale] |= idx_set_x_mask;

 		}
 		if (position.y >= (dimension / 2) + state.oct_pos.y) {

-			state.oct_pos.y |= (dimension / 2);
-
-			mask_index += 2;
+			// TODO What the hell is going on with the or operator on this one!??!?!?!
+			state.oct_pos.y += (dimension / 2);

            // TODO What is up with the XOR operator that was on this one?
 			state.idx_stack[state.scale] |= idx_set_y_mask;
@ -102,12 +92,14 @@ OctState Octree::GetVoxel(sf::Vector3i position) {

 			state.oct_pos.z += (dimension / 2);

-			mask_index += 4;
-
 			state.idx_stack[state.scale] |= idx_set_z_mask;

 		}

+		// Our count mask matches the way we index our idx so we can just 
+		// copy it over
+		int mask_index = state.idx_stack[state.scale];
+
 		// Check to see if we are on a valid oct
 		if ((head >> 16) & mask_8[mask_index]) {

@ -367,3 +359,13 @@ bool Octree::Validate(char* data, sf::Vector3i dimensions){
 unsigned int Octree::getDimensions() {
 	return oct_dimensions;
 }
+
+const uint8_t Octree::mask_8[8] = {
+	0x1,  0x2,  0x4,  0x8,
+	0x10, 0x20, 0x40, 0x80
+};
+
+const uint8_t Octree::count_mask_8[8] = {
+	0x1,  0x3,  0x7,  0xF,
+	0x1F, 0x3F, 0x7F, 0xFF
+};