Optimizing, fixing things in the kernel. More oct work

MitchellHansen 8 years ago
parent 8894d5e3a7
commit 76189ef0b4

@ -77,16 +77,10 @@ public:
static const uint8_t idx_set_z_mask = 0x4;
// Mask for checking if valid or leaf
const uint8_t mask_8[8] = {
0x1, 0x2, 0x4, 0x8,
0x10, 0x20, 0x40, 0x80
static const uint8_t mask_8[8];
// Mask for counting the previous valid bits
const uint8_t count_mask_8[8] = {
0x1, 0x3, 0x7, 0xF,
0x1F, 0x3F, 0x7F, 0xFF
static const uint8_t count_mask_8[8];
// uint64_t manipulation masks

@ -16,6 +16,7 @@ __constant int2 zeroed_int2 = {0, 0};
__constant const uchar idx_set_x_mask = 0x1;
__constant const uchar idx_set_y_mask = 0x2;
__constant const uchar idx_set_z_mask = 0x4;
__constant const uchar idx_set_mask = {0x1, 0x2, 0x4};
__constant const uchar mask_8[8] = {
0x1, 0x2, 0x4, 0x8,
@ -113,7 +114,6 @@ bool get_oct_vox(
ulong current_index = *settings_buffer;
ulong head = octree_descriptor_buffer[current_index];
uint parent_stack_position = 0;
ulong parent_stack[32];
uchar scale = 0;
@ -123,7 +123,7 @@ bool get_oct_vox(
bool found = false;
parent_stack[parent_stack_position] = head;
parent_stack[scale] = head;
// Set our initial dimension and the position at the corner of the oct to keep track of our position
int dimension = OCTDIM;
@ -139,40 +139,37 @@ bool get_oct_vox(
// No?
// Break
while (dimension > 1) {
while (dimension > 64) {
// So we can be a little bit tricky here and increment our
// array index that holds our masks as we build the idx.
// Adding 1 for X, 2 for Y, and 4 for Z
int mask_index = 0;
idx_stack[scale] = 0;
// Do the logic steps to find which sub oct we step down into
// Do the logic steps to find which sub oct we step down into
if (position.x >= (dimension / 2) + quad_position.x) {
// Set our voxel position to the (0,0) of the correct oct
quad_position.x += (dimension / 2);
// increment the mask index and mentioned above
mask_index += 1;
// Set the idx to represent the move
idx_stack[scale] |= idx_set_x_mask;
if (position.y >= (dimension / 2) + quad_position.y) {
quad_position.y |= (dimension / 2);
mask_index += 2;
quad_position.y += (dimension / 2);
idx_stack[scale] |= idx_set_y_mask;
if (position.z >= (dimension / 2) + quad_position.z) {
quad_position.z += (dimension / 2);
mask_index += 4;
idx_stack[scale] |= idx_set_z_mask;
int mask_index = idx_stack[scale];
// Check to see if we are on a valid oct
if ((head >> 16) & mask_8[mask_index]) {
@ -205,9 +202,8 @@ bool get_oct_vox(
head = octree_descriptor_buffer[current_index];
// Increment the parent stack position and put the new oct node as the parent
parent_stack[parent_stack_position] = head;
parent_stack[scale] = head;
else {
@ -285,7 +281,13 @@ __kernel void raycaster(
// for all 3 axis XYZ. We take the full positive cardinality when
// subtracting the floor, so we must transfer the sign over from
// the voxel step
float3 intersection_t = delta_t * ((*cam_pos) - ceil(*cam_pos)) * convert_float3(voxel_step);
// handle the case where we're smack on 0 for the camera position
float modifier = 0.0f;
if (any(((*cam_pos) - ceil(*cam_pos) == 0.0f)))
modifier = 0.000001f;
float3 intersection_t = delta_t * ((*cam_pos) - ceil(*cam_pos) + modifier) * convert_float3(voxel_step);
// When we transfer the sign over, we get the correct direction of
// the offset, but we merely transposed over the value instead of mirroring
@ -309,7 +311,7 @@ __kernel void raycaster(
bool shadow_ray = false;
// Andrew Woo's raycasting algo
while (distance_traveled < max_distance && bounce_count < 4) {
while (distance_traveled < max_distance && bounce_count < 2) {
// Fancy no branch version of the logic step
face_mask = intersection_t.xyz <= min(intersection_t.yzx, intersection_t.zxy);
@ -326,26 +328,26 @@ __kernel void raycaster(
constant int vox_dim = OCTDIM;
// If we hit a voxel
if (voxel.x < vox_dim && voxel.y < vox_dim && voxel.z < vox_dim){
if (get_oct_vox(
voxel_data = 5;
} else {
voxel_data = 0;
} else {
// // If we hit a voxel
// if (voxel.x < vox_dim && voxel.y < vox_dim && voxel.z < vox_dim){
// if (get_oct_vox(
// voxel,
// octree_descriptor_buffer,
// octree_attachment_lookup_buffer,
// octree_attachment_buffer,
// settings_buffer
// )){
// voxel_data = 5;
// } else {
// voxel_data = 0;
// }
// } else {
voxel_data = map[voxel.x + (*map_dim).x * (voxel.y + (*map_dim).z * (voxel.z))];
if (voxel_data != 0) {
if (voxel_data == 5 || voxel_data == 6) {
// Determine where on the 2d plane the ray intersected
face_position = zeroed_float3;
tile_face_position = zeroed_float2;
@ -415,7 +417,7 @@ __kernel void raycaster(
// Now we detect what type of of voxel we intersected and decide whether
// to bend the ray, send out a light intersection ray, or add texture color
if (voxel_data == 5 && !shadow_ray){
shadow_ray = true;
@ -436,7 +438,6 @@ __kernel void raycaster(
fog_distance = distance_traveled;
max_distance = distance_traveled + DistanceBetweenPoints(convert_float3(voxel), (float3)(lights[4], lights[5], lights[6]));
float3 hit_pos = convert_float3(voxel) + face_position;
ray_dir = normalize((float3)(lights[4], lights[5], lights[6]) - hit_pos);
if (any(ray_dir == zeroed_float3))
@ -459,8 +460,6 @@ __kernel void raycaster(
voxel_color.w -= 0.0f;
//max_distance += 200;
float3 hit_pos = convert_float3(voxel) + face_position;
ray_dir *= sign;
@ -471,8 +470,6 @@ __kernel void raycaster(
voxel_step = ( 1, 1, 1 );
voxel_step *= (ray_dir > 0) - (ray_dir < 0);
//voxel = convert_int3(hit_pos);
delta_t = fabs(1.0f / ray_dir);
intersection_t = delta_t * ((hit_pos)-floor(hit_pos)) * convert_float3(voxel_step);
intersection_t += delta_t * -convert_float3(isless(intersection_t, 0));

@ -52,17 +52,19 @@ Map::Map(uint32_t dimensions, Old_Map* array_map) {
bool Map::test_oct_arr_traversal(sf::Vector3i dimensions) {
sf::Vector2f cam_dir(0.95, 0.81);
sf::Vector3f cam_pos(10.5, 10.5, 10.5);
std::vector<std::tuple<sf::Vector3i, char>> list1 = CastRayCharArray(voxel_data, &dimensions, &cam_dir, &cam_pos);
std::vector<std::tuple<sf::Vector3i, char>> list2 = CastRayOctree(&octree, &dimensions, &cam_dir, &cam_pos);
if (list1 != list2) {
return false;
} else {
return true;
//sf::Vector2f cam_dir(0.95, 0.81);
//sf::Vector3f cam_pos(10.5, 10.5, 10.5);
//std::vector<std::tuple<sf::Vector3i, char>> list1 = CastRayCharArray(voxel_data, &dimensions, &cam_dir, &cam_pos);
//std::vector<std::tuple<sf::Vector3i, char>> list2 = CastRayOctree(&octree, &dimensions, &cam_dir, &cam_pos);
//if (list1 != list2) {
// return false;
//} else {
// return true;
return false;
void Map::setVoxel(sf::Vector3i pos, int val) {
@ -117,12 +119,6 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayCharArray(
voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0);
// =================================================================================================
// =================================================================================================
// =================================================================================================
// =================================================================================================
// Delta T is the units a ray must travel along an axis in order to
// traverse an integer split
sf::Vector3f delta_t(
@ -196,7 +192,7 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
) {
// Setup the voxel coords from the camera origin
sf::Vector3i voxel(*cam_pos);
sf::Vector3i voxel(0,0,0);
// This function when passed an "air" voxel will return as far down
@ -238,9 +234,8 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
voxel_step.z *= (ray_dir.z > 0) - (ray_dir.z < 0);
// set the jump multiplier based on the traversal state vs the log base 2 of the maps dimensions
int jump_power = 1;
if (log2(map_dim->x) != traversal_state.scale)
jump_power = pow(2, traversal_state.scale);
int jump_power = log2(map_dim->x) - traversal_state.scale;
// Delta T is the units a ray must travel along an axis in order to
// traverse an integer split
@ -316,17 +311,21 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
traversal_state.idx_stack[traversal_state.scale] ^= this_face_mask;
int mask_index = traversal_state.idx_stack[traversal_state.scale];
// Check to see if the idx increased or decreased
// If it decreased
// Pop up the stack until the oct that the ray is within is valid.
while (traversal_state.idx_stack[traversal_state.scale] < prev_val) {
// Pop up the stack until the oct that the idx flip is valid and we landed on a valid oct
while (traversal_state.idx_stack[traversal_state.scale] < prev_val ||
!((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & Octree::mask_8[mask_index])
) {
jump_power *= 2;
// Keep track of the 0th edge of out current oct
traversal_state.oct_pos.x = floor(voxel.x / 2) * jump_power;
traversal_state.oct_pos.y = floor(voxel.x / 2) * jump_power;
traversal_state.oct_pos.z = floor(voxel.x / 2) * jump_power;
// Clear and pop the idx stack
traversal_state.idx_stack[traversal_state.scale] = 0;
@ -345,10 +344,21 @@ std::vector<std::tuple<sf::Vector3i, char>> Map::CastRayOctree(
// Apply the face mask to the new idx for the while check
traversal_state.idx_stack[traversal_state.scale] ^= this_face_mask;
mask_index = traversal_state.idx_stack[traversal_state.scale];
// Check to see if we are on a valid oct
//if ((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 16) & Octree::mask_8[mask_index]) {
// // Check to see if it is a leaf
// if ((traversal_state.parent_stack[traversal_state.parent_stack_position] >> 24) & Octree::mask_8[mask_index]) {
// // If it is, then we cannot traverse further as CP's won't have been generated
// state.found = 1;
// return state;
// }
// Check to see if we are on top of a valid branch
// Traverse down to the lowest valid oct that the ray is within

@ -69,30 +69,20 @@ OctState Octree::GetVoxel(sf::Vector3i position) {
// Break
while (dimension > 1) {
// So we can be a little bit tricky here and increment our
// array index that holds our masks as we build the idx.
// Adding 1 for X, 2 for Y, and 4 for Z
int mask_index = 0;
// Do the logic steps to find which sub oct we step down into
if (position.x >= (dimension / 2) + state.oct_pos.x) {
// Set our voxel position to the (0,0) of the correct oct
state.oct_pos.x += (dimension / 2);
// increment the mask index and mentioned above
mask_index += 1;
// Set the idx to represent the move
state.idx_stack[state.scale] |= idx_set_x_mask;
if (position.y >= (dimension / 2) + state.oct_pos.y) {
state.oct_pos.y |= (dimension / 2);
mask_index += 2;
// TODO What the hell is going on with the or operator on this one!??!?!?!
state.oct_pos.y += (dimension / 2);
// TODO What is up with the XOR operator that was on this one?
state.idx_stack[state.scale] |= idx_set_y_mask;
@ -102,12 +92,14 @@ OctState Octree::GetVoxel(sf::Vector3i position) {
state.oct_pos.z += (dimension / 2);
mask_index += 4;
state.idx_stack[state.scale] |= idx_set_z_mask;
// Our count mask matches the way we index our idx so we can just
// copy it over
int mask_index = state.idx_stack[state.scale];
// Check to see if we are on a valid oct
if ((head >> 16) & mask_8[mask_index]) {
@ -367,3 +359,13 @@ bool Octree::Validate(char* data, sf::Vector3i dimensions){
unsigned int Octree::getDimensions() {
return oct_dimensions;
const uint8_t Octree::mask_8[8] = {
0x1, 0x2, 0x4, 0x8,
0x10, 0x20, 0x40, 0x80
const uint8_t Octree::count_mask_8[8] = {
0x1, 0x3, 0x7, 0xF,
0x1F, 0x3F, 0x7F, 0xFF
