Removed some of the C'ness, moving stuff into the kernel

9 years ago · c5d73bd5fd
parent 515c0767a9
commit c5d73bd5fd
4 changed files with 73 additions and 90 deletions
--- a/Conway_OpenCL/HelloWorld.cpp
+++ b/Conway_OpenCL/HelloWorld.cpp
@ -13,8 +13,6 @@
 #define SUCCESS 0
 #define FAILURE 1

-
-
 float elap_time() {
 	static __int64 start = 0;
 	static __int64 frequency = 0;
@ -71,77 +69,77 @@ int main(int argc, char* argv[])

 	// ============================== OpenCL Setup ==================================================================

-	/*Step1: Getting platforms and choose an available one.*/
-	cl_uint numPlatforms;	//the NO. of platforms
-	cl_platform_id platform = NULL;	//the chosen platform
-	cl_int	status = clGetPlatformIDs(0, NULL, &numPlatforms);
+	// Get the platforms
+	cl_uint numPlatforms;
+	cl_platform_id platform = NULL;
+	cl_int	status = clGetPlatformIDs(0, NULL, &numPlatforms); // Retrieve the number of platforms
 	if (status != CL_SUCCESS) {
 		std::cout << "Error: Getting platforms!" << std::endl;
 		return FAILURE;
 	}

 	 // Choose the first available platform
-	if(numPlatforms > 0)
-	{
-		cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id));
-		status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+	if(numPlatforms > 0) {
+		cl_platform_id* platforms = new cl_platform_id[numPlatforms]; 
+		status = clGetPlatformIDs(numPlatforms, platforms, NULL);	// Now populate the array with the platforms
 		platform = platforms[0];
-		free(platforms);
+		delete platforms;
 	}

-	/*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
-	cl_uint				numDevices = 0;
-	cl_device_id        *devices;
+	
+	cl_uint	numDevices = 0;
+	cl_device_id *devices;
 	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);	
 	if (numDevices == 0) { //no GPU available.
 		std::cout << "No GPU device available." << std::endl;
 		std::cout << "Choose CPU as default device." << std::endl;
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);	
-		devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
+		devices = new cl_device_id[numDevices];
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
 	}
 	else {
-		devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
+		devices = new cl_device_id[numDevices];
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
 	}
 	
-
-	/*Step 3: Create context.*/
 	cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
-	
-	/*Step 4: Creating command queue associate with the context.*/
 	cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);


 	// ============================== Kernel Compilation, Setup ====================================================
-
-	/*Step 5: Create program object */
-	const char *filename = "HelloWorld_Kernel.cl";
+	
+	// Read the kernel from the file to a string
+	const char *filename = "conway_kernel.cl";
 	std::string sourceStr;
 	status = convertToString(filename, sourceStr);
+
+	// Create a program with the source
 	const char *source = sourceStr.c_str();
 	size_t sourceSize[] = {strlen(source)};
 	cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
 	
-	// Build program and set kernel
-	status=clBuildProgram(program, 1,devices,NULL,NULL,NULL);
+	// Build the program
+	status = clBuildProgram(program, 1,devices,NULL,NULL,NULL);

+	// If the build failed
 	if (status == CL_BUILD_PROGRAM_FAILURE) {
+
 		// Determine the size of the log
 		size_t log_size;
 		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

 		// Allocate memory for the log
-		char *log = (char *)malloc(log_size);
+		char *log = new char[log_size];

 		// Get the log
 		clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

 		// Print the log
-		printf("%s\n", log);
+		std::cout << log << std::endl;
 	}

-	cl_kernel kernel = clCreateKernel(program, "helloworld", NULL);
+	// Now create the kernel
+	cl_kernel kernel = clCreateKernel(program, "conway", NULL);

 	// ======================================= Setup grid =========================================================

@ -149,26 +147,42 @@ int main(int argc, char* argv[])
 	std::mt19937 rng(time(NULL));
 	std::uniform_int_distribution<int> rgen(0, 12); // 25% chance

-	// Init the grid 
-	char* grid = new char[GRID_WIDTH * GRID_HEIGHT* 2];
+	// Init the grids
+	unsigned char* front_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT* 2];

-	for (int i = 0; i < 1000 * 1000 * 2; i += 2) {
+	for (int i = 0; i < 1000 * 1000; i += 2) {
 		if (rgen(rng) == 1) {
-			grid[i] = 1;
-			grid[i + 1] = 1;
+			front_grid[i] = 1;
 		}
 		else {
-			grid[i] = 0;
-			grid[i + 1] = 0;
+			front_grid[i] = 0;
 		}
 	}

+	unsigned char* rear_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT * 2];
+
+	for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) {
+		rear_grid[i] = front_grid[i];
+	}
+
 	// ====================================== Setup SFML ==========================================================

-	// Spites for drawing, probably where the biggest slowdown is
-	sf::RectangleShape live_node;
-	live_node.setFillColor(sf::Color(145, 181, 207));
-	live_node.setSize(sf::Vector2f(1, 1));
+	sf::Uint8* asdf = rear_grid;
+
+	sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4];
+
+	for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
+
+		int p = i / 2;
+
+		pixel_array[p * 4] = 49; // R?
+		pixel_array[p * 4 + 1] = 68; // G?
+		pixel_array[p * 4 + 2] = 72; // B?
+		pixel_array[p * 4 + 3] = 255; // A?
+	}
+
+	char* arr = new char[1000 * 1000];
+

 	// Init window, and loop data
 	sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games");
@ -179,27 +193,18 @@ int main(int argc, char* argv[])


 	int err = 0;
-	cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, &err);
+	cl_mem frontBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)front_grid, &err);
+	cl_mem rearBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, &err);
+
 	cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err);
 	cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err);
 	cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err);

-	status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
+	status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&frontBuffer);
 	status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer);
 	status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer);
 	status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer);

-	sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4];
-
-	for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
-
-		int p = i / 2;
-
-		pixel_array[p * 4] = 49; // R?
-		pixel_array[p * 4 + 1] = 68; // G?
-		pixel_array[p * 4 + 2] = 72; // B?
-		pixel_array[p * 4 + 3] = 255; // A?
-	}

 	sf::Texture texture;
 	texture.create(WINDOW_X, WINDOW_Y);
@ -231,7 +236,7 @@ int main(int argc, char* argv[])
 		// ======================================= OpenCL Shtuff =============================================

 		// Update the data in GPU memory
-		status = clEnqueueWriteBuffer(commandQueue, inputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, NULL, 0, NULL);
+		//status = clEnqueueWriteBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, NULL, 0, NULL);
 		
 		// Work size, for each y line
 		size_t global_work_size[1] = { WORKER_SIZE };
@ -240,30 +245,8 @@ int main(int argc, char* argv[])
 		status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);

 		// Get output, put back into grid
-		status = clEnqueueReadBuffer(commandQueue, inputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, 0, NULL, NULL);
-
+		status = clEnqueueReadBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, 0, NULL, NULL);

-		for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
-			
-			int p = i / 2;
-
-			if (grid[i + 1] == 1) {
-
-				pixel_array[p * 4] = 255; // R?
-				pixel_array[p * 4 + 1] = 255; // G?
-				pixel_array[p * 4 + 2] = 255; // B?
-				pixel_array[p * 4 + 3] = 255; // A?
-
-			}
-			else if (grid[i] == 1){
-				pixel_array[p * 4] = 49; // R?
-				pixel_array[p * 4 + 1] = 68; // G?
-				pixel_array[p * 4 + 2] = 72; // B?
-				pixel_array[p * 4 + 3] = 255; // A?
-			}
-
-			grid[i] = grid[i + 1];
-		}

 		texture.update(pixel_array);
 		window.draw(sprite);
@ -275,7 +258,7 @@ int main(int argc, char* argv[])


 	// Temporary
-	status = clReleaseMemObject(inputBuffer);
+	status = clReleaseMemObject(frontBuffer);
 	status = clReleaseMemObject(workerCountBuffer);
 	status = clReleaseMemObject(gridWidthBuffer);
 	status = clReleaseMemObject(gridHeightBuffer);
--- a/Conway_OpenCL/Conway_OpenCL.vcxproj
+++ b/Conway_OpenCL/Conway_OpenCL.vcxproj
@ -117,7 +117,7 @@
      <ImportLibrary>$(SolutionDir)bin/x86/Debug/HelloWorld.lib</ImportLibrary>
    </Link>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
@ -144,7 +144,7 @@
      <AdditionalOptions> /machine:x64 /debug %(AdditionalOptions)</AdditionalOptions>
    </Link>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
@ -182,7 +182,7 @@
      <LinkLibraryDependencies>false</LinkLibraryDependencies>
    </ProjectReference>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
@ -219,15 +219,15 @@
      <AdditionalOptions> /machine:x64 %(AdditionalOptions)</AdditionalOptions>
    </Link>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
  <ItemGroup>
-    <ClCompile Include="HelloWorld.cpp" />
+    <ClCompile Include="Conway.cpp" />
  </ItemGroup>
  <ItemGroup>
-    <None Include="HelloWorld_Kernel.cl" />
+    <None Include="conway_kernel.cl" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
--- a/Conway_OpenCL/HelloWorld_Kernel.cl
+++ b/Conway_OpenCL/HelloWorld_Kernel.cl
@ -1,4 +1,4 @@
-__kernel void helloworld(__global char* in, __global int* num_workers, __global int* grid_width, __global int* grid_height)
+__kernel void conway(__global unsigned char* front_grid, __global unsigned char* rear_grid, __global int* num_workers, __global int* grid_width, __global int* grid_height)
 {
 	// Caclulate the start and end range that this worker will be calculating

--- a/Conway_OpenCL/~AutoRecover.Conway_OpenCL.vcxproj
+++ b/Conway_OpenCL/~AutoRecover.Conway_OpenCL.vcxproj
@ -117,7 +117,7 @@
      <ImportLibrary>$(SolutionDir)bin/x86/Debug/HelloWorld.lib</ImportLibrary>
    </Link>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
@ -144,7 +144,7 @@
      <AdditionalOptions> /machine:x64 /debug %(AdditionalOptions)</AdditionalOptions>
    </Link>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
@ -182,7 +182,7 @@
      <LinkLibraryDependencies>false</LinkLibraryDependencies>
    </ProjectReference>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
@ -219,15 +219,15 @@
      <AdditionalOptions> /machine:x64 %(AdditionalOptions)</AdditionalOptions>
    </Link>
    <PostBuildEvent>
-      <Command>copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y
+      <Command>copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y
 	  </Command>
    </PostBuildEvent>
  </ItemDefinitionGroup>
  <ItemGroup>
-    <ClCompile Include="HelloWorld.cpp" />
+    <ClCompile Include="Conway.cpp" />
  </ItemGroup>
  <ItemGroup>
-    <None Include="HelloWorld_Kernel.cl" />
+    <None Include="conway_kernel.cl" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">