Refactored and commented the Hardware Caster. Cleaned up many small things

8 years ago · 3aaffce566
parent 50c6d68944
commit 3aaffce566
9 changed files with 131 additions and 70 deletions
--- a/include/LightController.h
+++ b/include/LightController.h
@ -1,11 +1,12 @@
 #pragma once
 #include <SFML/System/Vector3.hpp>
-#include <SFML/System/Vector2.hpp> 
+#include <list>
+#include <numeric>
 #include "util.hpp"
 #include "Pub_Sub.h"
 #include "raycaster/RayCaster.h"
-#include <list>
 #include "raycaster/Hardware_Caster.h"
+#include "LightHandle.h"

 struct LightPrototype {
 	
--- a/include/Map.h
+++ b/include/Map.h
@ -8,12 +8,12 @@
 #include <iostream>
 #include <functional>
 #include <cmath>
-#include "util.hpp"
 #include <deque>
 #include <unordered_map>
 #include <bitset>
 #include <cstring>
 #include <queue>
+#include "util.hpp"

 #define _USE_MATH_DEFINES
 #include <math.h>
@ -115,7 +115,7 @@ public:
 		uint8_t idx_stack[32] = {0};

 		// Init the idx stack
-		std::vector<std::bitset<3>> scale_stack(log2(OCT_DIM));
+		std::vector<std::bitset<3>> scale_stack(static_cast<uint64_t>(log2(OCT_DIM)));

 		// Set our initial dimension and the position we use to keep track what oct were in
 		int dimension = OCT_DIM;
@ -142,7 +142,7 @@ public:
 				idx_stack[scale] |= idx_set_x_mask;

 				// Debug
-				scale_stack.at(log2(OCT_DIM) - log2(dimension)).set(0);
+				scale_stack.at(static_cast<uint64_t>(log2(OCT_DIM) - log2(dimension))).set(0);
 				
 			}
 			if (position.y >= (dimension / 2) + quad_position.y) {
@ -152,7 +152,7 @@ public:
 				mask_index += 2;

 				idx_stack[scale] ^= idx_set_y_mask;
-				scale_stack.at(log2(OCT_DIM) - log2(dimension)).set(1);
+				scale_stack.at(static_cast<uint64_t>(log2(OCT_DIM) - log2(dimension))).set(1);
 			}
 			if (position.z >= (dimension / 2) + quad_position.z) {

@ -161,7 +161,7 @@ public:
 				mask_index += 4;

 				idx_stack[scale] |= idx_set_z_mask;
-				scale_stack.at(log2(OCT_DIM) - log2(dimension)).set(2);
+				scale_stack.at(static_cast<uint64_t>(log2(OCT_DIM) - log2(dimension))).set(2);
 			}

 			// Check to see if we are on a valid oct
@ -182,7 +182,7 @@ public:
 				
 				// Count the number of non-leaf octs that come before and add it to the current parent stack position
 				int count = count_bits((uint8_t)(head >> 24) ^ count_mask_8[mask_index]);
-				int index = (parent_stack[parent_stack_position] & child_pointer_mask) + count;
+				int index = static_cast<int>((parent_stack[parent_stack_position] & child_pointer_mask) + count);
 				
 				// Increment the parent stack position and put the new oct node as the parent
 				parent_stack_position++;
--- a/include/raycaster/Hardware_Caster.h
+++ b/include/raycaster/Hardware_Caster.h
@ -5,6 +5,8 @@
 #include <map>
 #include <string.h>
 #include "LightController.h"
+#include "Old_Map.h"
+#include "Camera.h"

 #ifdef linux
 #include <CL/cl.h>
@ -34,6 +36,9 @@ struct device {
 	char version[128];
 	cl_platform_id platform;
 	cl_uint comp_units;
+	char extensions[1024];
+	char name[256];
+	bool cl_gl_sharing = false;
 };

 struct PackedData;
@ -45,55 +50,93 @@ public:

 	virtual ~Hardware_Caster();

+	
+	// Queries hardware, creates the command queue and context, and compiles kernel
 	int init() override;

-	// In interop mode, this will create a GL texture that we share
-	// Otherwise, it will create the pixel buffer and pass that in as an image, retrieving it each draw
-	// Both will create the view matrix, view res buffer
+	// Creates a texture to send to the GPU via height and width
+	// Creates a viewport vector array via vertical and horizontal fov
 	void create_viewport(int width, int height, float v_fov, float h_fov) override;
 	
+	// Light controllers own the copy of the PackedData array.
+	// We receive a pointer to the array and USE_HOST_POINTER to map the memory to the GPU
 	void assign_lights(std::vector<PackedData> *data) override;
+
+	// We take a ptr to the map and create the map, and map_dimensions buffer for the GPU
 	void assign_map(Old_Map *map) override;
+
+	// We take a ptr to the camera and create a camera direction and position buffer
 	void assign_camera(Camera *camera) override;
-	void validate() override;

 	// TODO: Hoist this to the base class
+	// Creates 3 buffers relating to the texture atlas: texture_atlas, atlas_dim, and tile_dim
+	// With these on the GPU we can texture any quad with an atlas tile
 	void create_texture_atlas(sf::Texture *t, sf::Vector2i tile_dim);
+	
+	// Check to make sure that the buffers have been initiated and set them as kernel args
+	void validate() override;

-
-	// draw will abstract the gl sharing and software rendering
-	// methods of retrieving the screen buffer
+	// Aquires the GL objects, runs the kernel, releases back the GL objects
 	void compute() override;
+
+	// Take the viewport sprite and draw it to the screen
 	void draw(sf::RenderWindow* window) override;


+	// ================================== DEBUG =======================================
+	
+	// Re compile the kernel and revalidate the args
 	int debug_quick_recompile();
+
+	// Modify the viewport matrix
 	void test_edit_viewport(int width, int height, float v_fov, float h_fov);
-private:


+private:
+
+	// Iterate the devices available and choose the best one
+	// Also checks for the sharing extension
 	int acquire_platform_and_device();

+	// With respect to the individual platforms implementation of sharing
+	// create a shared cl_gl context
 	int create_shared_context();

+	// Using the context and the device create a command queue for them
 	int create_command_queue();

-	int check_cl_khr_gl_sharing();
-
+	// Buffer operations
+	// All of these functions create and store a buffer in a map with the key representing their name
+	
+	// Create an image buffer from an SF texture. Access Type is the read/write specifier required by OpenCL
 	int create_image_buffer(std::string buffer_name, cl_uint size, sf::Texture* texture, cl_int access_type);
+
+	// Create a buffer with CL_MEM_READ_ONLY and CL_MEM_COPY_HOST_PTR
 	int create_buffer(std::string buffer_name, cl_uint size, void* data);
+
+	// Create a buffer with user defined data flags
 	int create_buffer(std::string buffer_name, cl_uint size, void* data, cl_mem_flags flags);
-	int store_buffer(cl_mem, std::string buffer_name);
+	
+	// Store a cl_mem object in the buffer map <string:name, cl_mem:buffer>
+	int store_buffer(cl_mem buffer, std::string buffer_name);
+
+	// Using CL release the memory object and remove the KVP associated with the buffer name
 	int release_buffer(std::string buffer_name);
 	
+	// Compile the kernel with either a full src string or by is_path=true and kernel_source = a valid path
 	int compile_kernel(std::string kernel_source, bool is_path, std::string kernel_name);

+	// Set the arg index for the specified kernel and buffer
 	int set_kernel_arg(std::string kernel_name, int index, std::string buffer_name);

+	// Run the kernel using a 1d work size
+	// TODO: Test 2d worksize
 	int run_kernel(std::string kernel_name, const int work_size);

+	// Run a test kernel that prints out the kernel args
 	void print_kernel_arguments();

+	// CL error code handler. ImGui overlaps the assert() function annoyingly so I had to rename it
 	bool vr_assert(int error_code, std::string function_name);

 	cl_device_id getDeviceID();
@ -102,11 +145,15 @@ private:
 	cl_kernel getKernel(std::string kernel_name);
 	cl_command_queue getCommandQueue();

+	// Our device data
 	cl_platform_id platform_id;
 	cl_device_id device_id;
+
+	// And state
 	cl_context context;
 	cl_command_queue command_queue;

+	// Containers holding the kernels and buffers
 	std::map<std::string, cl_kernel> kernel_map;
 	std::map<std::string, cl_mem> buffer_map;

--- a/include/raycaster/RayCaster.h
+++ b/include/raycaster/RayCaster.h
@ -2,9 +2,9 @@
 #include <SFML/System/Vector3.hpp>
 #include <SFML/System/Vector2.hpp>
 #include <Map.h>
-#include "Old_Map.h"
-#include "Camera.h"

+class Old_Map;
+class Camera;
 struct PackedData;

 class RayCaster {
--- a/include/raycaster/Software_Caster.h
+++ b/include/raycaster/Software_Caster.h
@ -1,6 +1,8 @@
 #pragma once
 #include "raycaster/RayCaster.h"
 #include <thread>
+#include "Old_Map.h"
+#include "Camera.h"

 struct PackedData;

--- a/include/util.hpp
+++ b/include/util.hpp
@ -34,7 +34,7 @@ public:
 		if (arr_pos == 200)
 			arr_pos = 0;

-		fps_array[arr_pos] = (1.0 / fps_average);
+		fps_array[arr_pos] = static_cast<float>(1.0 / fps_average);
 		arr_pos++;

 		ImGui::Begin("Performance");
--- a/src/LightController.cpp
+++ b/src/LightController.cpp
@ -1,9 +1,4 @@
 #include "LightController.h"
-#include "LightHandle.h"
-#include <numeric>
-#include <SFML/System/Time.hpp>
-
-

 LightController::LightController(std::shared_ptr<Hardware_Caster> raycaster) : packed_data_array(reserved_count), open_list(reserved_count) {

--- a/src/main.cpp
+++ b/src/main.cpp
@ -93,7 +93,6 @@ int main() {
 	// ni.listen_for_clients(5000);
 	// ni.stop_listening_for_clients();

-
 	// =============================
 	// Map _map(sf::Vector3i(0, 0, 0));
 	// _map.generate_octree();
@ -109,12 +108,6 @@ int main() {
 	ImGui::SFML::Init(window);
 	window.resetGLStates();

-	/*GL_Testing t;
-	t.compile_shader("../shaders/passthrough.frag", GL_Testing::Shader_Type::FRAGMENT);
-	t.compile_shader("../shaders/passthrough.vert", GL_Testing::Shader_Type::VERTEX);
-	t.create_program();
-	t.create_buffers();*/
-
 	// Start up the raycaster
 	std::shared_ptr<Hardware_Caster> raycaster(new Hardware_Caster());
 	
@ -155,33 +148,24 @@ int main() {
 	
 	std::shared_ptr<LightHandle> handle(light_controller.create_light(prototype));

-
 	// Load in the spritesheet texture
 	sf::Texture spritesheet;
 	spritesheet.loadFromFile("../assets/textures/minecraft_tiles.png");
-	//spritesheet.getNativeHandle();
 	raycaster->create_texture_atlas(&spritesheet, sf::Vector2i(16, 16));

-
 	// Checks to see if proper data was uploaded, then sets the kernel args
 	// ALL DATA LOADING MUST BE FINISHED
 	raycaster->validate();

-	// ========== DEBUG ==========
-    fps_counter fps;
-	// ===========================
-
 	Input input_handler;
-
 	camera->subscribe_to_publisher(&input_handler, vr::Event::EventType::KeyHeld);
 	camera->subscribe_to_publisher(&input_handler, vr::Event::EventType::KeyPressed);
 	camera->subscribe_to_publisher(&input_handler, vr::Event::EventType::MouseMoved);
-	//handle->subscribe_to_publisher(&ni, vr::Event::EventType::JoystickMoved);

 	WindowHandler win_hand(&window);
 	win_hand.subscribe_to_publisher(&input_handler, vr::Event::EventType::Closed);

-	// 16.6 milliseconds (60FPS)
+
 	float step_size = 0.0166f;
 	double  frame_time = 0.0,
 		elapsed_time = 0.0,
@ -192,6 +176,7 @@ int main() {
 	// The sfml imgui wrapper I'm using requires Update be called with sf::Time
 	// Might modify it to also accept seconds
 	sf::Clock sf_delta_clock;
+	fps_counter fps;

 	while (window.isOpen()) {

--- a/src/raycaster/Hardware_Caster.cpp
+++ b/src/raycaster/Hardware_Caster.cpp
@ -16,10 +16,6 @@ int Hardware_Caster::init() {
 	if(vr_assert(error, "aquire_platform_and_device"))
 		return error;

-	error = check_cl_khr_gl_sharing();
-	if(vr_assert(error, "check_cl_khr_gl_sharing"))
-		return error;
-
 	error = create_shared_context();
 	if (vr_assert(error, "create_shared_context"))
 		return error;
@ -265,7 +261,7 @@ void Hardware_Caster::test_edit_viewport(int width, int height, float v_fov, flo
 				ray.y,
 				ray.z,
 				0
-				);
+			);
 		}
 	}
 }
@ -294,9 +290,9 @@ int Hardware_Caster::acquire_platform_and_device() {
 		cl_uint deviceIdCount = 0;
 		error = clGetDeviceIDs(plt_buf[i], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount);

-		// Check to see if we even have opencl on this machine
+		// Check to see if we even have OpenCL on this machine
 		if (deviceIdCount == 0) {
-			std::cout << "There appears to be no platforms supporting opencl" << std::endl;
+			std::cout << "There appears to be no platforms supporting OpenCL" << std::endl;
 			return OPENCL_NOT_SUPPORTED;
 		}

@ -318,6 +314,40 @@ int Hardware_Caster::acquire_platform_and_device() {
 			clGetDeviceInfo(d.id, CL_DEVICE_TYPE, sizeof(cl_device_type), &d.type, NULL);
 			clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &d.clock_frequency, NULL);
 			clGetDeviceInfo(d.id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &d.comp_units, NULL);
+			clGetDeviceInfo(d.id, CL_DEVICE_EXTENSIONS, 1024, &d.extensions, NULL);
+			clGetDeviceInfo(d.id, CL_DEVICE_NAME, 256, &d.name, NULL);
+
+			std::cout << "Device: " << q << std::endl;
+			std::cout << "Device Name : " << d.name << std::endl;
+			
+			std::cout << "Platform ID    : " << d.platform << std::endl;
+			std::cout << "Device Version : " << d.version << std::endl;
+
+			std::cout << "Device Type    : ";
+			if (d.type == CL_DEVICE_TYPE_CPU)
+				std::cout << "CPU" << std::endl;
+
+			else if (d.type == CL_DEVICE_TYPE_GPU)
+				std::cout << "GPU" << std::endl;
+
+			else if (d.type == CL_DEVICE_TYPE_ACCELERATOR)
+				std::cout << "Accelerator" << std::endl;
+
+			std::cout << "Max clock frequency : " << d.clock_frequency << std::endl;
+			std::cout << "Max compute units   : " << d.comp_units << std::endl;
+
+			std::cout << "cl_khr_gl_sharing supported: ";
+			if (std::string(d.extensions).find("cl_khr_gl_sharing") == std::string::npos &&
+				std::string(d.extensions).find("cl_APPLE_gl_sharing") == std::string::npos) {
+				std::cout << "False" << std::endl;
+			}
+			std::cout << "True" << std::endl;
+			d.cl_gl_sharing = true;
+
+			std::cout << "Extensions supported: " << std::endl;
+			std::cout << std::string(d.extensions) << std::endl;
+
+			std::cout << " ===================================================================================== " << std::endl;

 			plt_ids.at(d.platform).push_back(d);
 		}
@ -342,13 +372,22 @@ int Hardware_Caster::acquire_platform_and_device() {

 			// Upon success of a condition, set the current best device values

+			// If the current device is not a GPU and we are comparing it to a GPU
 			if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU) {
 				current_best_device = device;
 			}
-			else if (device.comp_units > current_best_device.comp_units) {
+
+			// Get the unit with the higher compute units
+			if (device.comp_units > current_best_device.comp_units) {
 				current_best_device = device;
 			}
-			else if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency) {
+
+			// If we are comparing CPU to CPU get the one with the best clock
+			if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency) {
+				current_best_device = device;
+			}
+
+			if (current_best_device.cl_gl_sharing == false && device.cl_gl_sharing == true) {
 				current_best_device = device;
 			}
 		}
@ -357,6 +396,15 @@ int Hardware_Caster::acquire_platform_and_device() {
 	platform_id = current_best_device.platform;
 	device_id = current_best_device.id;

+	std::cout << std::endl;
+	std::cout << "Selected Platform : " << platform_id << std::endl;
+	std::cout << "Selected Device   : " << device_id << std::endl;
+	std::cout << "Selected Name     : " << current_best_device.name << std::endl;
+	
+	if (current_best_device.cl_gl_sharing == false) {
+		std::cout << "This device does not support the cl_khr_gl_sharing extension" << std::endl;
+		return RayCaster::SHARING_NOT_SUPPORTED;
+	}
 	return 1;
 };

@ -431,24 +479,7 @@ int Hardware_Caster::create_command_queue() {
 	}
 }

-int Hardware_Caster::check_cl_khr_gl_sharing() {

-	// Test for sharing
-	size_t ext_str_size = 1024;
-	char *ext_str = new char[ext_str_size];
-	clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, ext_str_size, ext_str, &ext_str_size);
-
-	std::cout << std::string(ext_str);
-	if (std::string(ext_str).find("cl_khr_gl_sharing") == std::string::npos &&
-      	    std::string(ext_str).find("cl_APPLE_gl_sharing") == std::string::npos) {
-		std::cout << "No support for the cl_khr_gl_sharing extension";
-		delete ext_str;
-		return RayCaster::SHARING_NOT_SUPPORTED;
-	}
-
-	delete ext_str;
-	return 1;
-}

 int Hardware_Caster::compile_kernel(std::string kernel_source, bool is_path, std::string kernel_name) {