~10 FPS from moving some oct stuff to const, ~0.5 fps from adding a few more consts to initializers in the kernel

master
MitchellHansen 7 years ago
parent 58ef1da02a
commit c5c65474d6

@ -0,0 +1,27 @@
#pragma once
#include "Pub_Sub.h"
class FrameWatcher : public VrEventPublisher{
public:
FrameWatcher();
~FrameWatcher();
void do_tick();
private:
float get_elapsed_time();
float step_size = 0.0166f;
double frame_time = 0.0;
double elapsed_time = 0.0;
double delta_time = 0.0;
double accumulator_time = 0.0;
double current_time = 0.0;
};

@ -4,8 +4,6 @@
#include "util.hpp"
#include <tuple>
#define OCT_DIM 128
struct OctState {
int parent_stack_position = 0;
@ -69,8 +67,11 @@ public:
bool Validate(char* data, sf::Vector3i dimensions);
unsigned int getDimensions();
private:
unsigned int oct_dimensions = 1;
std::tuple<uint64_t, uint64_t> GenerationRecursion(
char* data, // raw octree data
sf::Vector3i dimensions, // dimensions of the raw data

@ -1,4 +1,11 @@
__constant float4 zeroed_float4 = {0.0f, 0.0f, 0.0f, 0.0f};
__constant float3 zeroed_float3 = {0.0f, 0.0f, 0.0f};
__constant float2 zeroed_float2 = {0.0f, 0.0f};
__constant int4 zeroed_int4 = {0, 0, 0, 0};
__constant int3 zeroed_int3 = {0, 0, 0};
__constant int2 zeroed_int2 = {0, 0};
float DistanceBetweenPoints(float3 a, float3 b) {
return fast_distance(a, b);
//return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2) + pow(a.z - b.z, 2));
@ -31,8 +38,8 @@ float4 white_light(float4 input, float3 light, int3 mask) {
float4 view_light(float4 in_color, float3 light, float4 light_color, float3 view, int3 mask) {
if (all(light == (0.0f,0.0f,0.0f)))
return (0,0,0,0);
if (all(light == zeroed_float3))
return zeroed_float4;
float d = Distance(light) / 100.0f;
d *= d;
@ -65,6 +72,30 @@ int rand(int* seed) // 1 <= *seed < m
return(*seed);
}
// (X, Y, Z) mask for the idx
__constant const uchar idx_set_x_mask = 0x1;
__constant const uchar idx_set_y_mask = 0x2;
__constant const uchar idx_set_z_mask = 0x4;
__constant const uchar mask_8[8] = {
0x1, 0x2, 0x4, 0x8,
0x10, 0x20, 0x40, 0x80
};
// Mask for counting the previous valid bits
__constant const uchar count_mask_8[8] = {
0x1, 0x3, 0x7, 0xF,
0x1F, 0x3F, 0x7F, 0xFF
};
// uint64_t manipulation masks
__constant const ulong child_pointer_mask = 0x0000000000007fff;
__constant const ulong far_bit_mask = 0x8000;
__constant const ulong valid_mask = 0xFF0000;
__constant const ulong leaf_mask = 0xFF000000;
__constant const ulong contour_pointer_mask = 0xFFFFFF00000000;
__constant const ulong contour_mask = 0xFF00000000000000;
bool get_oct_vox(
int3 position,
global ulong *octree_descriptor_buffer,
@ -73,31 +104,6 @@ bool get_oct_vox(
global ulong *settings_buffer
){
// (X, Y, Z) mask for the idx
const uchar idx_set_x_mask = 0x1;
const uchar idx_set_y_mask = 0x2;
const uchar idx_set_z_mask = 0x4;
const uchar mask_8[8] = {
0x1, 0x2, 0x4, 0x8,
0x10, 0x20, 0x40, 0x80
};
// Mask for counting the previous valid bits
const uchar count_mask_8[8] = {
0x1, 0x3, 0x7, 0xF,
0x1F, 0x3F, 0x7F, 0xFF
};
// uint64_t manipulation masks
const ulong child_pointer_mask = 0x0000000000007fff;
const ulong far_bit_mask = 0x8000;
const ulong valid_mask = 0xFF0000;
const ulong leaf_mask = 0xFF000000;
const ulong contour_pointer_mask = 0xFFFFFF00000000;
const ulong contour_mask = 0xFF00000000000000;
// push the root node to the parent stack
ulong current_index = *settings_buffer;
ulong head = octree_descriptor_buffer[current_index];
@ -115,8 +121,8 @@ bool get_oct_vox(
parent_stack[parent_stack_position] = head;
// Set our initial dimension and the position at the corner of the oct to keep track of our position
int dimension = 128;
int3 quad_position = (0, 0, 0);
int dimension = 64;
int3 quad_position = zeroed_int3;
// While we are not at the required resolution
// Traverse down by setting the valid/leaf mask to the subvoxel
@ -156,7 +162,9 @@ bool get_oct_vox(
mask_index += 2;
// TODO What is up with the binary operator on this one?
idx_stack[scale] ^= idx_set_y_mask;
// Alright, I switched it over and seems not to have done anything?
// idx_stack[scale] ^= idx_set_y_mask;
idx_stack[scale] |= idx_set_y_mask;
}
if (position.z >= (dimension / 2) + quad_position.z) {
@ -237,7 +245,7 @@ bool cast_light_intersection_ray(
int3 voxel_step = { 1, 1, 1 };
voxel_step *= (ray_dir > 0) - (ray_dir < 0);
if (any(ray_dir == (0.0f,0.0f,0.0f)))
if (any(ray_dir == zeroed_float3))
return false;
// Setup the voxel coords from the camera origin
@ -252,7 +260,7 @@ bool cast_light_intersection_ray(
// for negative values, wrap around the delta_t
intersection_t += delta_t * -convert_float3(isless(intersection_t, 0));
int3 face_mask = { 0, 0, 0 };
int3 face_mask =zeroed_int3;
int length_cutoff = 0;
// Andrew Woo's raycasting algo
@ -344,7 +352,7 @@ __kernel void raycaster(
// Delta T is the units a ray must travel along an axis in order to
// traverse an integer split
if (any(ray_dir == (0.0f,0.0f,0.0f)))
if (any(ray_dir == zeroed_float3))
return;
float3 delta_t = fabs(1.0f / ray_dir);
@ -366,11 +374,11 @@ __kernel void raycaster(
uint bounce_count = 0;
int3 face_mask = { 0, 0, 0 };
int voxel_data = 0;
float3 face_position = (0,0,0);
float4 voxel_color= (0,0,0,0);
float2 tile_face_position = (0,0);
float3 sign = (0,0,0);
float4 first_strike = (0,0,0,0);
float3 face_position = zeroed_float3;
float4 voxel_color= zeroed_float4;
float2 tile_face_position = zeroed_float2;
float3 sign = zeroed_float3;
float4 first_strike = zeroed_float4;
bool shadow_ray = false;
@ -386,35 +394,35 @@ __kernel void raycaster(
if (any(voxel >= *map_dim) || any(voxel < 0)){
voxel_data = 5;
voxel.xyz -= voxel_step.xyz * face_mask.xyz;
first_strike = mix(fog_color, voxel_color, 1.0 - max(distance_traveled / 700.0f, (float)0));
first_strike = mix(fog_color, voxel_color, 1.0f - max(distance_traveled / 700.0f, 0.0f));
}
// If we hit a voxel
// if (voxel.x < 128 && voxel.y < 128 && voxel.z < 128){
// if (get_oct_vox(
// voxel,
// octree_descriptor_buffer,
// octree_attachment_lookup_buffer,
// octree_attachment_buffer,
// settings_buffer
// )){
// voxel_data = 1;
// } else {
// voxel_data = 0;
// }
// } else {
if (voxel.x < 64 && voxel.y < 64 && voxel.z < 64){
if (get_oct_vox(
voxel,
octree_descriptor_buffer,
octree_attachment_lookup_buffer,
octree_attachment_buffer,
settings_buffer
)){
voxel_data = 5;
} else {
voxel_data = 0;
}
} else {
voxel_data = map[voxel.x + (*map_dim).x * (voxel.y + (*map_dim).z * (voxel.z))];
//}
}
if (voxel_data != 0) {
// Determine where on the 2d plane the ray intersected
face_position = (float3)(0);
tile_face_position = (float2)(0);
face_position = zeroed_float3;
tile_face_position = zeroed_float2;
sign = (float3)(1.0f, 1.0f, 1.0f);
// First determine the percent of the way the ray is towards the next intersection_t
@ -517,7 +525,7 @@ __kernel void raycaster(
float3 hit_pos = convert_float3(voxel) + face_position;
ray_dir = normalize((float3)(lights[4], lights[5], lights[6]) - hit_pos);
if (any(ray_dir == (0.0f,0.0f,0.0f)))
if (any(ray_dir == zeroed_float3))
return;
voxel -= voxel_step * face_mask;
@ -539,13 +547,13 @@ __kernel void raycaster(
convert_int2((float2)(3, 4) * convert_float2(*atlas_dim / *tile_dim))
).xyz/2;
voxel_color.w += 0.3f;
max_distance = 500;
voxel_color.w -= 0.3f;
max_distance = 700;
distance_traveled = 0;
float3 hit_pos = convert_float3(voxel) + face_position;
ray_dir *= sign;
if (any(ray_dir == (0.0f,0.0f,0.0f)))
if (any(ray_dir == zeroed_float3))
return;
voxel -= voxel_step * face_mask;

@ -23,9 +23,6 @@ Application::~Application() {
bool Application::init_clcaster() {
//Map _map(32);
//return 0;
// Start up the raycaster
raycaster = std::make_shared<CLCaster>();
if (!raycaster->init())
@ -38,7 +35,9 @@ bool Application::init_clcaster() {
// Send the data to the GPU
raycaster->assign_map(map);
octree = std::make_shared<Map>(128, map.get());
// Init the raycaster with a specified dimension and a pointer to the source
// array style data
octree = std::make_shared<Map>(64, map.get());
raycaster->assign_octree(octree);

@ -0,0 +1,47 @@
#include "FrameWatcher.h"
#include <chrono>
FrameWatcher::FrameWatcher() {
}
FrameWatcher::~FrameWatcher()
{
}
void FrameWatcher::do_tick() {
elapsed_time = get_elapsed_time();
delta_time = elapsed_time - current_time;
current_time = elapsed_time;
if (delta_time > 0.2f)
delta_time = 0.2f;
accumulator_time += delta_time;
while ((accumulator_time - step_size) >= step_size) {
accumulator_time -= step_size;
// ==== DELTA TIME LOCKED ====
}
}
float FrameWatcher::get_elapsed_time() {
static std::chrono::time_point<std::chrono::system_clock> start;
static bool started = false;
if (!started) {
start = std::chrono::system_clock::now();
started = true;
}
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_time = now - start;
return static_cast<float>(elapsed_time.count());
}

@ -26,7 +26,7 @@
* - Octree, Map interface with the GPU
* - Octree, Map refactoring
* - Separate Application stages into areas that make sense
* -
* - Saving, loading of RLE voxel data and raw oct data
*/
#include "Application.h"

@ -66,7 +66,7 @@ bool Map::test_oct_arr_traversal(sf::Vector3i dimensions) {
}
void Map::setVoxel(sf::Vector3i pos, int val) {
voxel_data[pos.x + OCT_DIM * (pos.y + OCT_DIM * pos.z)] = val;
voxel_data[pos.x + octree.getDimensions() * (pos.y + octree.getDimensions() * pos.z)] = val;
}
char Map::getVoxel(sf::Vector3i pos){

@ -11,13 +11,15 @@ Octree::Octree() {
void Octree::Generate(char* data, sf::Vector3i dimensions) {
oct_dimensions = dimensions.x;
// Launch the recursive generator at (0,0,0) as the first point
// and the octree dimension as the initial block size
std::tuple<uint64_t, uint64_t> root_node = GenerationRecursion(data, dimensions, sf::Vector3i(0, 0, 0), OCT_DIM/2);
std::tuple<uint64_t, uint64_t> root_node = GenerationRecursion(data, dimensions, sf::Vector3i(0, 0, 0), oct_dimensions/2);
// ========= DEBUG ==============
PrettyPrintUINT64(std::get<0>(root_node), &output_stream);
output_stream << " " << OCT_DIM << " " << counter++ << std::endl;
output_stream << " " << oct_dimensions << " " << counter++ << std::endl;
// ==============================
// set the root nodes relative pointer to 1 because the next element will be the top of the tree, and push to the stack
@ -51,7 +53,7 @@ OctState Octree::GetVoxel(sf::Vector3i position) {
state.parent_stack[state.parent_stack_position] = head;
// Set our initial dimension and the position at the corner of the oct to keep track of our position
int dimension = OCT_DIM;
int dimension = oct_dimensions;
sf::Vector3i quad_position(0, 0, 0);
// While we are not at the required resolution
@ -313,7 +315,7 @@ std::tuple<uint64_t, uint64_t> Octree::GenerationRecursion(char* data, sf::Vecto
}
char Octree::get1DIndexedVoxel(char* data, sf::Vector3i dimensions, sf::Vector3i position) {
return data[position.x + OCT_DIM * (position.y + OCT_DIM * position.z)];
return data[position.x + oct_dimensions * (position.y + oct_dimensions * position.z)];
}
bool Octree::Validate(char* data, sf::Vector3i dimensions){
@ -343,3 +345,7 @@ bool Octree::Validate(char* data, sf::Vector3i dimensions){
return true;
}
unsigned int Octree::getDimensions() {
return oct_dimensions;
}

Loading…
Cancel
Save