Refactored and commented the Hardware Caster. Cleaned up many small things

master
MitchellHansen 8 years ago
parent 50c6d68944
commit 3aaffce566

@ -1,11 +1,12 @@
#pragma once
#include <SFML/System/Vector3.hpp>
#include <SFML/System/Vector2.hpp>
#include <list>
#include <numeric>
#include "util.hpp"
#include "Pub_Sub.h"
#include "raycaster/RayCaster.h"
#include <list>
#include "raycaster/Hardware_Caster.h"
#include "LightHandle.h"
struct LightPrototype {

@ -8,12 +8,12 @@
#include <iostream>
#include <functional>
#include <cmath>
#include "util.hpp"
#include <deque>
#include <unordered_map>
#include <bitset>
#include <cstring>
#include <queue>
#include "util.hpp"
#define _USE_MATH_DEFINES
#include <math.h>
@ -115,7 +115,7 @@ public:
uint8_t idx_stack[32] = {0};
// Init the idx stack
std::vector<std::bitset<3>> scale_stack(log2(OCT_DIM));
std::vector<std::bitset<3>> scale_stack(static_cast<uint64_t>(log2(OCT_DIM)));
// Set our initial dimension and the position we use to keep track what oct were in
int dimension = OCT_DIM;
@ -142,7 +142,7 @@ public:
idx_stack[scale] |= idx_set_x_mask;
// Debug
scale_stack.at(log2(OCT_DIM) - log2(dimension)).set(0);
scale_stack.at(static_cast<uint64_t>(log2(OCT_DIM) - log2(dimension))).set(0);
}
if (position.y >= (dimension / 2) + quad_position.y) {
@ -152,7 +152,7 @@ public:
mask_index += 2;
idx_stack[scale] ^= idx_set_y_mask;
scale_stack.at(log2(OCT_DIM) - log2(dimension)).set(1);
scale_stack.at(static_cast<uint64_t>(log2(OCT_DIM) - log2(dimension))).set(1);
}
if (position.z >= (dimension / 2) + quad_position.z) {
@ -161,7 +161,7 @@ public:
mask_index += 4;
idx_stack[scale] |= idx_set_z_mask;
scale_stack.at(log2(OCT_DIM) - log2(dimension)).set(2);
scale_stack.at(static_cast<uint64_t>(log2(OCT_DIM) - log2(dimension))).set(2);
}
// Check to see if we are on a valid oct
@ -182,7 +182,7 @@ public:
// Count the number of non-leaf octs that come before and add it to the current parent stack position
int count = count_bits((uint8_t)(head >> 24) ^ count_mask_8[mask_index]);
int index = (parent_stack[parent_stack_position] & child_pointer_mask) + count;
int index = static_cast<int>((parent_stack[parent_stack_position] & child_pointer_mask) + count);
// Increment the parent stack position and put the new oct node as the parent
parent_stack_position++;

@ -5,6 +5,8 @@
#include <map>
#include <string.h>
#include "LightController.h"
#include "Old_Map.h"
#include "Camera.h"
#ifdef linux
#include <CL/cl.h>
@ -34,6 +36,9 @@ struct device {
char version[128];
cl_platform_id platform;
cl_uint comp_units;
char extensions[1024];
char name[256];
bool cl_gl_sharing = false;
};
struct PackedData;
@ -45,55 +50,93 @@ public:
virtual ~Hardware_Caster();
// Queries hardware, creates the command queue and context, and compiles kernel
int init() override;
// In interop mode, this will create a GL texture that we share
// Otherwise, it will create the pixel buffer and pass that in as an image, retrieving it each draw
// Both will create the view matrix, view res buffer
// Creates a texture to send to the GPU via height and width
// Creates a viewport vector array via vertical and horizontal fov
void create_viewport(int width, int height, float v_fov, float h_fov) override;
// Light controllers own the copy of the PackedData array.
// We receive a pointer to the array and USE_HOST_POINTER to map the memory to the GPU
void assign_lights(std::vector<PackedData> *data) override;
// We take a ptr to the map and create the map, and map_dimensions buffer for the GPU
void assign_map(Old_Map *map) override;
// We take a ptr to the camera and create a camera direction and position buffer
void assign_camera(Camera *camera) override;
void validate() override;
// TODO: Hoist this to the base class
// Creates 3 buffers relating to the texture atlas: texture_atlas, atlas_dim, and tile_dim
// With these on the GPU we can texture any quad with an atlas tile
void create_texture_atlas(sf::Texture *t, sf::Vector2i tile_dim);
// Check to make sure that the buffers have been initiated and set them as kernel args
void validate() override;
// draw will abstract the gl sharing and software rendering
// methods of retrieving the screen buffer
// Aquires the GL objects, runs the kernel, releases back the GL objects
void compute() override;
// Take the viewport sprite and draw it to the screen
void draw(sf::RenderWindow* window) override;
// ================================== DEBUG =======================================
// Re compile the kernel and revalidate the args
int debug_quick_recompile();
// Modify the viewport matrix
void test_edit_viewport(int width, int height, float v_fov, float h_fov);
private:
private:
// Iterate the devices available and choose the best one
// Also checks for the sharing extension
int acquire_platform_and_device();
// With respect to the individual platforms implementation of sharing
// create a shared cl_gl context
int create_shared_context();
// Using the context and the device create a command queue for them
int create_command_queue();
int check_cl_khr_gl_sharing();
// Buffer operations
// All of these functions create and store a buffer in a map with the key representing their name
// Create an image buffer from an SF texture. Access Type is the read/write specifier required by OpenCL
int create_image_buffer(std::string buffer_name, cl_uint size, sf::Texture* texture, cl_int access_type);
// Create a buffer with CL_MEM_READ_ONLY and CL_MEM_COPY_HOST_PTR
int create_buffer(std::string buffer_name, cl_uint size, void* data);
// Create a buffer with user defined data flags
int create_buffer(std::string buffer_name, cl_uint size, void* data, cl_mem_flags flags);
int store_buffer(cl_mem, std::string buffer_name);
// Store a cl_mem object in the buffer map <string:name, cl_mem:buffer>
int store_buffer(cl_mem buffer, std::string buffer_name);
// Using CL release the memory object and remove the KVP associated with the buffer name
int release_buffer(std::string buffer_name);
// Compile the kernel with either a full src string or by is_path=true and kernel_source = a valid path
int compile_kernel(std::string kernel_source, bool is_path, std::string kernel_name);
// Set the arg index for the specified kernel and buffer
int set_kernel_arg(std::string kernel_name, int index, std::string buffer_name);
// Run the kernel using a 1d work size
// TODO: Test 2d worksize
int run_kernel(std::string kernel_name, const int work_size);
// Run a test kernel that prints out the kernel args
void print_kernel_arguments();
// CL error code handler. ImGui overlaps the assert() function annoyingly so I had to rename it
bool vr_assert(int error_code, std::string function_name);
cl_device_id getDeviceID();
@ -102,11 +145,15 @@ private:
cl_kernel getKernel(std::string kernel_name);
cl_command_queue getCommandQueue();
// Our device data
cl_platform_id platform_id;
cl_device_id device_id;
// And state
cl_context context;
cl_command_queue command_queue;
// Containers holding the kernels and buffers
std::map<std::string, cl_kernel> kernel_map;
std::map<std::string, cl_mem> buffer_map;

@ -2,9 +2,9 @@
#include <SFML/System/Vector3.hpp>
#include <SFML/System/Vector2.hpp>
#include <Map.h>
#include "Old_Map.h"
#include "Camera.h"
class Old_Map;
class Camera;
struct PackedData;
class RayCaster {

@ -1,6 +1,8 @@
#pragma once
#include "raycaster/RayCaster.h"
#include <thread>
#include "Old_Map.h"
#include "Camera.h"
struct PackedData;

@ -34,7 +34,7 @@ public:
if (arr_pos == 200)
arr_pos = 0;
fps_array[arr_pos] = (1.0 / fps_average);
fps_array[arr_pos] = static_cast<float>(1.0 / fps_average);
arr_pos++;
ImGui::Begin("Performance");

@ -1,9 +1,4 @@
#include "LightController.h"
#include "LightHandle.h"
#include <numeric>
#include <SFML/System/Time.hpp>
LightController::LightController(std::shared_ptr<Hardware_Caster> raycaster) : packed_data_array(reserved_count), open_list(reserved_count) {

@ -93,7 +93,6 @@ int main() {
// ni.listen_for_clients(5000);
// ni.stop_listening_for_clients();
// =============================
// Map _map(sf::Vector3i(0, 0, 0));
// _map.generate_octree();
@ -109,12 +108,6 @@ int main() {
ImGui::SFML::Init(window);
window.resetGLStates();
/*GL_Testing t;
t.compile_shader("../shaders/passthrough.frag", GL_Testing::Shader_Type::FRAGMENT);
t.compile_shader("../shaders/passthrough.vert", GL_Testing::Shader_Type::VERTEX);
t.create_program();
t.create_buffers();*/
// Start up the raycaster
std::shared_ptr<Hardware_Caster> raycaster(new Hardware_Caster());
@ -155,33 +148,24 @@ int main() {
std::shared_ptr<LightHandle> handle(light_controller.create_light(prototype));
// Load in the spritesheet texture
sf::Texture spritesheet;
spritesheet.loadFromFile("../assets/textures/minecraft_tiles.png");
//spritesheet.getNativeHandle();
raycaster->create_texture_atlas(&spritesheet, sf::Vector2i(16, 16));
// Checks to see if proper data was uploaded, then sets the kernel args
// ALL DATA LOADING MUST BE FINISHED
raycaster->validate();
// ========== DEBUG ==========
fps_counter fps;
// ===========================
Input input_handler;
camera->subscribe_to_publisher(&input_handler, vr::Event::EventType::KeyHeld);
camera->subscribe_to_publisher(&input_handler, vr::Event::EventType::KeyPressed);
camera->subscribe_to_publisher(&input_handler, vr::Event::EventType::MouseMoved);
//handle->subscribe_to_publisher(&ni, vr::Event::EventType::JoystickMoved);
WindowHandler win_hand(&window);
win_hand.subscribe_to_publisher(&input_handler, vr::Event::EventType::Closed);
// 16.6 milliseconds (60FPS)
float step_size = 0.0166f;
double frame_time = 0.0,
elapsed_time = 0.0,
@ -192,6 +176,7 @@ int main() {
// The sfml imgui wrapper I'm using requires Update be called with sf::Time
// Might modify it to also accept seconds
sf::Clock sf_delta_clock;
fps_counter fps;
while (window.isOpen()) {

@ -16,10 +16,6 @@ int Hardware_Caster::init() {
if(vr_assert(error, "aquire_platform_and_device"))
return error;
error = check_cl_khr_gl_sharing();
if(vr_assert(error, "check_cl_khr_gl_sharing"))
return error;
error = create_shared_context();
if (vr_assert(error, "create_shared_context"))
return error;
@ -265,7 +261,7 @@ void Hardware_Caster::test_edit_viewport(int width, int height, float v_fov, flo
ray.y,
ray.z,
0
);
);
}
}
}
@ -294,9 +290,9 @@ int Hardware_Caster::acquire_platform_and_device() {
cl_uint deviceIdCount = 0;
error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount);
// Check to see if we even have opencl on this machine
// Check to see if we even have OpenCL on this machine
if (deviceIdCount == 0) {
std::cout << "There appears to be no platforms supporting opencl" << std::endl;
std::cout << "There appears to be no platforms supporting OpenCL" << std::endl;
return OPENCL_NOT_SUPPORTED;
}
@ -318,6 +314,40 @@ int Hardware_Caster::acquire_platform_and_device() {
clGetDeviceInfo(d.id, CL_DEVICE_TYPE, sizeof(cl_device_type), &d.type, NULL);
clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &d.clock_frequency, NULL);
clGetDeviceInfo(d.id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &d.comp_units, NULL);
clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, 1024, &d.extensions, NULL);
clGetDeviceInfo(d.id, CL_DEVICE_NAME, 256, &d.name, NULL);
std::cout << "Device: " << q << std::endl;
std::cout << "Device Name : " << d.name << std::endl;
std::cout << "Platform ID : " << d.platform << std::endl;
std::cout << "Device Version : " << d.version << std::endl;
std::cout << "Device Type : ";
if (d.type == CL_DEVICE_TYPE_CPU)
std::cout << "CPU" << std::endl;
else if (d.type == CL_DEVICE_TYPE_GPU)
std::cout << "GPU" << std::endl;
else if (d.type == CL_DEVICE_TYPE_ACCELERATOR)
std::cout << "Accelerator" << std::endl;
std::cout << "Max clock frequency : " << d.clock_frequency << std::endl;
std::cout << "Max compute units : " << d.comp_units << std::endl;
std::cout << "cl_khr_gl_sharing supported: ";
if (std::string(d.extensions).find("cl_khr_gl_sharing") == std::string::npos &&
std::string(d.extensions).find("cl_APPLE_gl_sharing") == std::string::npos) {
std::cout << "False" << std::endl;
}
std::cout << "True" << std::endl;
d.cl_gl_sharing = true;
std::cout << "Extensions supported: " << std::endl;
std::cout << std::string(d.extensions) << std::endl;
std::cout << " ===================================================================================== " << std::endl;
plt_ids.at(d.platform).push_back(d);
}
@ -342,13 +372,22 @@ int Hardware_Caster::acquire_platform_and_device() {
// Upon success of a condition, set the current best device values
// If the current device is not a GPU and we are comparing it to a GPU
if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU) {
current_best_device = device;
}
else if (device.comp_units > current_best_device.comp_units) {
// Get the unit with the higher compute units
if (device.comp_units > current_best_device.comp_units) {
current_best_device = device;
}
else if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency) {
// If we are comparing CPU to CPU get the one with the best clock
if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency) {
current_best_device = device;
}
if (current_best_device.cl_gl_sharing == false && device.cl_gl_sharing == true) {
current_best_device = device;
}
}
@ -357,6 +396,15 @@ int Hardware_Caster::acquire_platform_and_device() {
platform_id = current_best_device.platform;
device_id = current_best_device.id;
std::cout << std::endl;
std::cout << "Selected Platform : " << platform_id << std::endl;
std::cout << "Selected Device : " << device_id << std::endl;
std::cout << "Selected Name : " << current_best_device.name << std::endl;
if (current_best_device.cl_gl_sharing == false) {
std::cout << "This device does not support the cl_khr_gl_sharing extension" << std::endl;
return RayCaster::SHARING_NOT_SUPPORTED;
}
return 1;
};
@ -431,24 +479,7 @@ int Hardware_Caster::create_command_queue() {
}
}
int Hardware_Caster::check_cl_khr_gl_sharing() {
// Test for sharing
size_t ext_str_size = 1024;
char *ext_str = new char[ext_str_size];
clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, ext_str_size, ext_str, &ext_str_size);
std::cout << std::string(ext_str);
if (std::string(ext_str).find("cl_khr_gl_sharing") == std::string::npos &&
std::string(ext_str).find("cl_APPLE_gl_sharing") == std::string::npos) {
std::cout << "No support for the cl_khr_gl_sharing extension";
delete ext_str;
return RayCaster::SHARING_NOT_SUPPORTED;
}
delete ext_str;
return 1;
}
int Hardware_Caster::compile_kernel(std::string kernel_source, bool is_path, std::string kernel_name) {

Loading…
Cancel
Save