MBP was having problems with out of bounds memory operations with the way the cam dir was handled. sf::vector3f -> float3 and while accessign the Zth element. I'm assuming it was because of some weird backend stuff regarding that gentypeOdds are actually gentypeOdds + 1. Converted write_imagef's to write_imageui's though I don't think that really helps anything. Fixed the bottom half of the screen getting cut off. View matrix import error. Fixed problem the MBP had with negative values during device init, that was a weird one.

9 years ago · 10bc771807
parent 4e96985104
commit 10bc771807
4 changed files with 59 additions and 40 deletions
--- a/kernels/minimal_kernel.cl
+++ b/kernels/minimal_kernel.cl
@ -1,15 +1,15 @@


-float4 white_light(float4 input, float3 light, int3 mask) {
+uint4 white_light(uint4 input, float3 light, int3 mask) {

 	input.w = input.w + acos(
 		dot(
 			normalize(light),
 			normalize(fabs(convert_float3(mask)))
 			)
-		) / 2;
+		) * 50;

-	return input;
+	return (input);

 }

@ -25,8 +25,11 @@ __kernel void min_kern(
        __write_only image2d_t image
 ){

+
    size_t id = get_global_id(0);
    int2 pixel = {id % resolution->x, id / resolution->x};
+    //int2 pixel = {1, 1};
+
    float3 ray_dir = projection_matrix[pixel.x + resolution->x * pixel.y];

    ray_dir = (float3)(
@ -34,6 +37,22 @@ __kernel void min_kern(
            ray_dir.y,
            ray_dir.z * cos(cam_dir->y) - ray_dir.x * sin(cam_dir->y)
    );
+//
+//    float a = cam_dir->x;
+//    float b = cam_dir->y;
+//    float c = cam_dir->z;
+//
+//    ray_dir.x = ray_dir.z * sin(b) + ray_dir.x * cos(b);
+//    ray_dir.y = ray_dir.y;
+//    ray_dir.z = ray_dir.z * cos(b) - ray_dir.x * sin(b);
+//
+//
+//    float3 ray_dir2 = (float3)(
+//    ray_dir.x * cos(c) - ray_dir.y * sin(c),
+//    ray_dir.x * sin(c) + ray_dir.y * cos(c),
+//    ray_dir.z);
+//
+//    printf("%f, %f, %f", ray_dir2.x, ray_dir2.y, ray_dir2.z);

    ray_dir = (float3)(
          ray_dir.x * cos(cam_dir->z) - ray_dir.y * sin(cam_dir->z),
@ -83,11 +102,11 @@ __kernel void min_kern(
 		int3 undershoot = voxel > 0;

 		if (overshoot.x == 0 || overshoot.y == 0 || overshoot.z == 0 || undershoot.x == 0 || undershoot.y == 0){
-			write_imagef(image, pixel, (float4)(.73, .81, .89, 1.0));
+			write_imageui(image, pixel, (uint4)(50, 50, 50, 255));
 			return;
 		}
 		if (undershoot.z == 0) {
-			write_imagef(image, pixel, (float4)(.14, .30, .50, 1.0));
+			write_imageui(image, pixel, (uint4)(14, 30, 50, 255));
 			return;
 		}

@ -98,23 +117,23 @@ __kernel void min_kern(
 		if (voxel_data != 0) {
 			switch (voxel_data) {
 			case 1:
-				write_imagef(image, pixel, (float4)(.50, .00, .00, 1));
+				write_imageui(image, pixel, (uint4)(50, 0, 0, 255));
 				return;
 			case 2:
-				write_imagef(image, pixel, (float4)(.00, .50, .40, 1.00));
+				write_imageui(image, pixel, (uint4)(0, 50, 40, 255));
 				return;
 			case 3:
-				write_imagef(image, pixel, (float4)(.00, .00, .50, 1.00));
+				write_imageui(image, pixel, (uint4)(0, 0, 50, 255));
 				return;
 			case 4:
-				write_imagef(image, pixel, (float4)(.25, .00, .25, 1.00));
+				write_imageui(image, pixel, (uint4)(25, 0, 25, 255));
 				return;
 			case 5:
-				//write_imagef(image, pixel, (float4)(.25, .00, .25, 1.00));
-				write_imagef(image, pixel, white_light((float4)(.25, .32, .14, 0.2), (float3)(lights[7], lights[8], lights[9]), mask));
+				//write_imageui(image, pixel, (uint4)(200, 200, 200, 255));
+				write_imageui(image, pixel, white_light((uint4)(225, 232, 214, 100), (float3)(lights[7], lights[8], lights[9]), mask));
 				return;
 			case 6:
-				write_imagef(image, pixel, (float4)(.30, .80, .10, 1.00));
+				write_imageui(image, pixel, (uint4)(30, 80, 10, 255));
 				return;
 			}
 		}
@ -122,6 +141,6 @@ __kernel void min_kern(
        dist++;
    } while (dist < max_dist);

-    write_imagef(image, pixel, (float4)(.73, .81, .89, 1.0));
+    write_imageui(image, pixel, (uint4)(73, 81, 89, 255));
    return;
 }
--- a/src/CL_Wrapper.cpp
+++ b/src/CL_Wrapper.cpp
@ -59,8 +59,9 @@ int CL_Wrapper::acquire_platform_and_device(){
    // falling back to the cpu with the fastest clock if we weren't able to find one

    device current_best_device;
-	current_best_device.type = -1; // Set this to -1 so the first run always selects a new device
-
+	current_best_device.type = 0; // Set this to -1 so the first run always selects a new device
+    current_best_device.clock_frequency = 0;
+    current_best_device.comp_units = 0;

    for (auto kvp: plt_ids){

@ -86,7 +87,7 @@ int CL_Wrapper::acquire_platform_and_device(){
    platform_id = current_best_device.platform;
    device_id = current_best_device.id;

-    return 0;
+    return 1;
 };

 int CL_Wrapper::create_shared_context() {
@ -240,7 +241,7 @@ int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){

 int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){

-    const int WORKER_SIZE = 10;
+    const int WORKER_SIZE = 1;
    size_t global_work_size[1] = { static_cast<size_t>(work_size) };

    cl_kernel kernel = kernel_map.at(kernel_name);
--- a/src/TestPlatform.cpp
+++ b/src/TestPlatform.cpp
@ -3,6 +3,8 @@
 #include <cstring>
 #include <iostream>
 #include <vector>
+#include <OpenCL/opencl.h>
+

 #ifdef linux

@ -14,7 +16,7 @@

 #elif defined TARGET_OS_MAC
 #include <OpenGL/OpenGL.h>
-# include <OpenCL/opencl.h>
+
 #endif


@ -52,7 +54,6 @@ int IsExtensionSupported(

 inline int test_for_gl_cl_sharing() {

-
    int err = 0;

 #if defined (__APPLE__) || defined(MACOSX)
--- a/src/main.cpp
+++ b/src/main.cpp
@ -25,15 +25,15 @@
 #include <OpenCL/cl_ext.h>

 #endif
-#include "TestPlatform.cpp"
 #include "Map.h"
 #include "Curses.h"
 #include "util.hpp"
 #include "RayCaster.h"
 #include "CL_Wrapper.h"

-const int WINDOW_X = 1000;
-const int WINDOW_Y = 1000;
+const int WINDOW_X = 200;
+const int WINDOW_Y = 200;
+const int WORK_SIZE = WINDOW_X * WINDOW_Y;

 const int MAP_X = 1024;
 const int MAP_Y = 1024;
@ -69,7 +69,6 @@ int main() {
 	sf::Texture t;

 	CL_Wrapper c;
-	query_platform_devices();
 	c.acquire_platform_and_device();
 	c.create_shared_context();
 	c.create_command_queue();
@ -140,19 +139,18 @@ int main() {
        }
    }
 	std::cout << "done\n";
-    int ind = 367;
-    printf("%i === %f, %f, %f\n", ind, view_matrix[ind * 4 + 0], view_matrix[ind * 4 + 1], view_matrix[ind * 4 + 2]);

    cl_mem view_matrix_buff = clCreateBuffer(
            c.getContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-            sizeof(float) * 3 * view_res.x * view_res.y, view_matrix, NULL
+            sizeof(float) * 4 * view_res.x * view_res.y, view_matrix, NULL
    );

-    sf::Vector3f cam_dir(1.0f, 0.0f, 1.00f);
+    //sf::Vector3f cam_dir(1.0f, 0.0f, 1.00f);

+    float cam_dir[] = {1.0f, 0.0f, 1.57f, 0.0f};
    cl_mem cam_dir_buff = clCreateBuffer(
            c.getContext(), CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
-            sizeof(float) * 4, &cam_dir, NULL
+            sizeof(float) * 4, cam_dir, NULL
    );


@ -221,9 +219,10 @@ int main() {
 	c.set_kernel_arg("min_kern", 7, "light_count_buffer");
 	c.set_kernel_arg("min_kern", 8, "image_buffer");

-    const int size = WINDOW_X * WINDOW_Y;

-    s.setTexture(t);
+
+    s.setTexture(t, true);
+    s.setPosition(0, 0);

    // The step size in milliseconds between calls to Update()
    // Lets set it to 16.6 milliseonds (60FPS)
@ -300,7 +299,7 @@ int main() {
 			cam_vec.x = -1;
 		}
        if (sf::Keyboard::isKeyPressed(sf::Keyboard::Left)) {
-            cam_dir.z = -0.1f;
+            //cam_dir.z = -0.1f;
        }
        if (sf::Keyboard::isKeyPressed(sf::Keyboard::Right)) {
            cam_vec.z = +0.1f;
@ -318,15 +317,15 @@ int main() {

 				// Mouse movement
 				sf::Mouse::setPosition(fixed);
-				cam_dir.y -= deltas.y / 300.0f;
-				cam_dir.z -= deltas.x / 300.0f;
+				cam_dir[1] -= deltas.y / 300.0f;
+				cam_dir[2] -= deltas.x / 300.0f;
 			}
 		}
 		cam_pos.x += cam_vec.x / 1.0;
 		cam_pos.y += cam_vec.y / 1.0;
 		cam_pos.z += cam_vec.z / 1.0;

-		std::cout << cam_vec.x << " : " << cam_vec.y << " : " << cam_vec.z << std::endl;
+		//std::cout << cam_vec.x << " : " << cam_vec.y << " : " << cam_vec.z << std::endl;


        // Time keeping
@ -364,7 +363,7 @@ int main() {
 		error = clEnqueueAcquireGLObjects(c.getCommandQueue(), 1, &image_buff, 0, 0, 0);
 		if (c.assert(error, "clEnqueueAcquireGLObjects"))
 			return -1;
-		c.run_kernel("min_kern", size);
+		c.run_kernel("min_kern", WORK_SIZE);

 		clFinish(c.getCommandQueue());

@ -372,7 +371,6 @@ int main() {
 		if (c.assert(error, "clEnqueueReleaseGLObjects"))
 			return -1;

-        s.setPosition(0, 0);
        window.draw(s);
 		
 		fps.frame(delta_time);