From c5c65474d6a083eb179d882539e60f3c0ad54af1 Mon Sep 17 00:00:00 2001 From: MitchellHansen Date: Sat, 7 Oct 2017 21:32:22 -0700 Subject: [PATCH] ~10 FPS from moving some oct stuff to const, ~0.5 fps from adding a few more consts to initializers in the kernel --- include/FrameWatcher.h | 27 ++++++++ include/map/Octree.h | 5 +- kernels/ray_caster_kernel.cl | 126 +++++++++++++++++++---------------- src/Application.cpp | 7 +- src/FrameWatcher.cpp | 47 +++++++++++++ src/main.cpp | 2 +- src/map/Map.cpp | 2 +- src/map/Octree.cpp | 14 ++-- 8 files changed, 159 insertions(+), 71 deletions(-) create mode 100644 include/FrameWatcher.h create mode 100644 src/FrameWatcher.cpp diff --git a/include/FrameWatcher.h b/include/FrameWatcher.h new file mode 100644 index 0000000..7fac42c --- /dev/null +++ b/include/FrameWatcher.h @@ -0,0 +1,27 @@ +#pragma once +#include "Pub_Sub.h" + +class FrameWatcher : public VrEventPublisher{ + + + + +public: + FrameWatcher(); + ~FrameWatcher(); + + void do_tick(); + +private: + + float get_elapsed_time(); + + float step_size = 0.0166f; + double frame_time = 0.0; + double elapsed_time = 0.0; + double delta_time = 0.0; + double accumulator_time = 0.0; + double current_time = 0.0; + + +}; diff --git a/include/map/Octree.h b/include/map/Octree.h index 1d35847..d3e7a7a 100644 --- a/include/map/Octree.h +++ b/include/map/Octree.h @@ -4,8 +4,6 @@ #include "util.hpp" #include -#define OCT_DIM 128 - struct OctState { int parent_stack_position = 0; @@ -69,8 +67,11 @@ public: bool Validate(char* data, sf::Vector3i dimensions); + unsigned int getDimensions(); private: + unsigned int oct_dimensions = 1; + std::tuple GenerationRecursion( char* data, // raw octree data sf::Vector3i dimensions, // dimensions of the raw data diff --git a/kernels/ray_caster_kernel.cl b/kernels/ray_caster_kernel.cl index 9c5986e..5aa021a 100644 --- a/kernels/ray_caster_kernel.cl +++ b/kernels/ray_caster_kernel.cl @@ -1,4 +1,11 @@ +__constant float4 zeroed_float4 = {0.0f, 0.0f, 0.0f, 0.0f}; +__constant float3 zeroed_float3 = {0.0f, 0.0f, 0.0f}; +__constant float2 zeroed_float2 = {0.0f, 0.0f}; +__constant int4 zeroed_int4 = {0, 0, 0, 0}; +__constant int3 zeroed_int3 = {0, 0, 0}; +__constant int2 zeroed_int2 = {0, 0}; + float DistanceBetweenPoints(float3 a, float3 b) { return fast_distance(a, b); //return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2) + pow(a.z - b.z, 2)); @@ -31,8 +38,8 @@ float4 white_light(float4 input, float3 light, int3 mask) { float4 view_light(float4 in_color, float3 light, float4 light_color, float3 view, int3 mask) { - if (all(light == (0.0f,0.0f,0.0f))) - return (0,0,0,0); + if (all(light == zeroed_float3)) + return zeroed_float4; float d = Distance(light) / 100.0f; d *= d; @@ -65,6 +72,30 @@ int rand(int* seed) // 1 <= *seed < m return(*seed); } + // (X, Y, Z) mask for the idx +__constant const uchar idx_set_x_mask = 0x1; +__constant const uchar idx_set_y_mask = 0x2; +__constant const uchar idx_set_z_mask = 0x4; + +__constant const uchar mask_8[8] = { + 0x1, 0x2, 0x4, 0x8, + 0x10, 0x20, 0x40, 0x80 +}; + +// Mask for counting the previous valid bits +__constant const uchar count_mask_8[8] = { + 0x1, 0x3, 0x7, 0xF, + 0x1F, 0x3F, 0x7F, 0xFF +}; + +// uint64_t manipulation masks +__constant const ulong child_pointer_mask = 0x0000000000007fff; +__constant const ulong far_bit_mask = 0x8000; +__constant const ulong valid_mask = 0xFF0000; +__constant const ulong leaf_mask = 0xFF000000; +__constant const ulong contour_pointer_mask = 0xFFFFFF00000000; +__constant const ulong contour_mask = 0xFF00000000000000; + bool get_oct_vox( int3 position, global ulong *octree_descriptor_buffer, @@ -73,31 +104,6 @@ bool get_oct_vox( global ulong *settings_buffer ){ - // (X, Y, Z) mask for the idx - const uchar idx_set_x_mask = 0x1; - const uchar idx_set_y_mask = 0x2; - const uchar idx_set_z_mask = 0x4; - - const uchar mask_8[8] = { - 0x1, 0x2, 0x4, 0x8, - 0x10, 0x20, 0x40, 0x80 - }; - - // Mask for counting the previous valid bits - const uchar count_mask_8[8] = { - 0x1, 0x3, 0x7, 0xF, - 0x1F, 0x3F, 0x7F, 0xFF - }; - - // uint64_t manipulation masks - const ulong child_pointer_mask = 0x0000000000007fff; - const ulong far_bit_mask = 0x8000; - const ulong valid_mask = 0xFF0000; - const ulong leaf_mask = 0xFF000000; - const ulong contour_pointer_mask = 0xFFFFFF00000000; - const ulong contour_mask = 0xFF00000000000000; - - // push the root node to the parent stack ulong current_index = *settings_buffer; ulong head = octree_descriptor_buffer[current_index]; @@ -115,8 +121,8 @@ bool get_oct_vox( parent_stack[parent_stack_position] = head; // Set our initial dimension and the position at the corner of the oct to keep track of our position - int dimension = 128; - int3 quad_position = (0, 0, 0); + int dimension = 64; + int3 quad_position = zeroed_int3; // While we are not at the required resolution // Traverse down by setting the valid/leaf mask to the subvoxel @@ -156,7 +162,9 @@ bool get_oct_vox( mask_index += 2; // TODO What is up with the binary operator on this one? - idx_stack[scale] ^= idx_set_y_mask; + // Alright, I switched it over and seems not to have done anything? + // idx_stack[scale] ^= idx_set_y_mask; + idx_stack[scale] |= idx_set_y_mask; } if (position.z >= (dimension / 2) + quad_position.z) { @@ -237,7 +245,7 @@ bool cast_light_intersection_ray( int3 voxel_step = { 1, 1, 1 }; voxel_step *= (ray_dir > 0) - (ray_dir < 0); - if (any(ray_dir == (0.0f,0.0f,0.0f))) + if (any(ray_dir == zeroed_float3)) return false; // Setup the voxel coords from the camera origin @@ -252,7 +260,7 @@ bool cast_light_intersection_ray( // for negative values, wrap around the delta_t intersection_t += delta_t * -convert_float3(isless(intersection_t, 0)); - int3 face_mask = { 0, 0, 0 }; + int3 face_mask =zeroed_int3; int length_cutoff = 0; // Andrew Woo's raycasting algo @@ -344,7 +352,7 @@ __kernel void raycaster( // Delta T is the units a ray must travel along an axis in order to // traverse an integer split - if (any(ray_dir == (0.0f,0.0f,0.0f))) + if (any(ray_dir == zeroed_float3)) return; float3 delta_t = fabs(1.0f / ray_dir); @@ -366,11 +374,11 @@ __kernel void raycaster( uint bounce_count = 0; int3 face_mask = { 0, 0, 0 }; int voxel_data = 0; - float3 face_position = (0,0,0); - float4 voxel_color= (0,0,0,0); - float2 tile_face_position = (0,0); - float3 sign = (0,0,0); - float4 first_strike = (0,0,0,0); + float3 face_position = zeroed_float3; + float4 voxel_color= zeroed_float4; + float2 tile_face_position = zeroed_float2; + float3 sign = zeroed_float3; + float4 first_strike = zeroed_float4; bool shadow_ray = false; @@ -386,35 +394,35 @@ __kernel void raycaster( if (any(voxel >= *map_dim) || any(voxel < 0)){ voxel_data = 5; voxel.xyz -= voxel_step.xyz * face_mask.xyz; - first_strike = mix(fog_color, voxel_color, 1.0 - max(distance_traveled / 700.0f, (float)0)); + first_strike = mix(fog_color, voxel_color, 1.0f - max(distance_traveled / 700.0f, 0.0f)); } // If we hit a voxel - // if (voxel.x < 128 && voxel.y < 128 && voxel.z < 128){ - // if (get_oct_vox( - // voxel, - // octree_descriptor_buffer, - // octree_attachment_lookup_buffer, - // octree_attachment_buffer, - // settings_buffer - // )){ - // voxel_data = 1; - // } else { - // voxel_data = 0; - // } - // } else { + if (voxel.x < 64 && voxel.y < 64 && voxel.z < 64){ + if (get_oct_vox( + voxel, + octree_descriptor_buffer, + octree_attachment_lookup_buffer, + octree_attachment_buffer, + settings_buffer + )){ + voxel_data = 5; + } else { + voxel_data = 0; + } + } else { voxel_data = map[voxel.x + (*map_dim).x * (voxel.y + (*map_dim).z * (voxel.z))]; - //} + } if (voxel_data != 0) { // Determine where on the 2d plane the ray intersected - face_position = (float3)(0); - tile_face_position = (float2)(0); + face_position = zeroed_float3; + tile_face_position = zeroed_float2; sign = (float3)(1.0f, 1.0f, 1.0f); // First determine the percent of the way the ray is towards the next intersection_t @@ -517,7 +525,7 @@ __kernel void raycaster( float3 hit_pos = convert_float3(voxel) + face_position; ray_dir = normalize((float3)(lights[4], lights[5], lights[6]) - hit_pos); - if (any(ray_dir == (0.0f,0.0f,0.0f))) + if (any(ray_dir == zeroed_float3)) return; voxel -= voxel_step * face_mask; @@ -539,13 +547,13 @@ __kernel void raycaster( convert_int2((float2)(3, 4) * convert_float2(*atlas_dim / *tile_dim)) ).xyz/2; - voxel_color.w += 0.3f; - max_distance = 500; + voxel_color.w -= 0.3f; + max_distance = 700; distance_traveled = 0; float3 hit_pos = convert_float3(voxel) + face_position; ray_dir *= sign; - if (any(ray_dir == (0.0f,0.0f,0.0f))) + if (any(ray_dir == zeroed_float3)) return; voxel -= voxel_step * face_mask; diff --git a/src/Application.cpp b/src/Application.cpp index b1d8418..dd5b115 100644 --- a/src/Application.cpp +++ b/src/Application.cpp @@ -23,9 +23,6 @@ Application::~Application() { bool Application::init_clcaster() { - //Map _map(32); - //return 0; - // Start up the raycaster raycaster = std::make_shared(); if (!raycaster->init()) @@ -38,7 +35,9 @@ bool Application::init_clcaster() { // Send the data to the GPU raycaster->assign_map(map); - octree = std::make_shared(128, map.get()); + // Init the raycaster with a specified dimension and a pointer to the source + // array style data + octree = std::make_shared(64, map.get()); raycaster->assign_octree(octree); diff --git a/src/FrameWatcher.cpp b/src/FrameWatcher.cpp new file mode 100644 index 0000000..f90a9e3 --- /dev/null +++ b/src/FrameWatcher.cpp @@ -0,0 +1,47 @@ +#include "FrameWatcher.h" +#include + +FrameWatcher::FrameWatcher() { + +} + +FrameWatcher::~FrameWatcher() +{ + +} + +void FrameWatcher::do_tick() { + + + elapsed_time = get_elapsed_time(); + delta_time = elapsed_time - current_time; + current_time = elapsed_time; + + if (delta_time > 0.2f) + delta_time = 0.2f; + + accumulator_time += delta_time; + + while ((accumulator_time - step_size) >= step_size) { + accumulator_time -= step_size; + + // ==== DELTA TIME LOCKED ==== + } + +} + +float FrameWatcher::get_elapsed_time() { + + static std::chrono::time_point start; + static bool started = false; + + if (!started) { + start = std::chrono::system_clock::now(); + started = true; + } + + std::chrono::time_point now = std::chrono::system_clock::now(); + std::chrono::duration elapsed_time = now - start; + return static_cast(elapsed_time.count()); +} + diff --git a/src/main.cpp b/src/main.cpp index 3a57ea4..4abba57 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -26,7 +26,7 @@ * - Octree, Map interface with the GPU * - Octree, Map refactoring * - Separate Application stages into areas that make sense - * - + * - Saving, loading of RLE voxel data and raw oct data */ #include "Application.h" diff --git a/src/map/Map.cpp b/src/map/Map.cpp index 904201e..2b8f19a 100644 --- a/src/map/Map.cpp +++ b/src/map/Map.cpp @@ -66,7 +66,7 @@ bool Map::test_oct_arr_traversal(sf::Vector3i dimensions) { } void Map::setVoxel(sf::Vector3i pos, int val) { - voxel_data[pos.x + OCT_DIM * (pos.y + OCT_DIM * pos.z)] = val; + voxel_data[pos.x + octree.getDimensions() * (pos.y + octree.getDimensions() * pos.z)] = val; } char Map::getVoxel(sf::Vector3i pos){ diff --git a/src/map/Octree.cpp b/src/map/Octree.cpp index 3326839..1a05fe4 100644 --- a/src/map/Octree.cpp +++ b/src/map/Octree.cpp @@ -11,13 +11,15 @@ Octree::Octree() { void Octree::Generate(char* data, sf::Vector3i dimensions) { + oct_dimensions = dimensions.x; + // Launch the recursive generator at (0,0,0) as the first point // and the octree dimension as the initial block size - std::tuple root_node = GenerationRecursion(data, dimensions, sf::Vector3i(0, 0, 0), OCT_DIM/2); + std::tuple root_node = GenerationRecursion(data, dimensions, sf::Vector3i(0, 0, 0), oct_dimensions/2); // ========= DEBUG ============== PrettyPrintUINT64(std::get<0>(root_node), &output_stream); - output_stream << " " << OCT_DIM << " " << counter++ << std::endl; + output_stream << " " << oct_dimensions << " " << counter++ << std::endl; // ============================== // set the root nodes relative pointer to 1 because the next element will be the top of the tree, and push to the stack @@ -51,7 +53,7 @@ OctState Octree::GetVoxel(sf::Vector3i position) { state.parent_stack[state.parent_stack_position] = head; // Set our initial dimension and the position at the corner of the oct to keep track of our position - int dimension = OCT_DIM; + int dimension = oct_dimensions; sf::Vector3i quad_position(0, 0, 0); // While we are not at the required resolution @@ -313,7 +315,7 @@ std::tuple Octree::GenerationRecursion(char* data, sf::Vecto } char Octree::get1DIndexedVoxel(char* data, sf::Vector3i dimensions, sf::Vector3i position) { - return data[position.x + OCT_DIM * (position.y + OCT_DIM * position.z)]; + return data[position.x + oct_dimensions * (position.y + oct_dimensions * position.z)]; } bool Octree::Validate(char* data, sf::Vector3i dimensions){ @@ -343,3 +345,7 @@ bool Octree::Validate(char* data, sf::Vector3i dimensions){ return true; } + +unsigned int Octree::getDimensions() { + return oct_dimensions; +}