diff --git a/Conway_OpenCL/HelloWorld.cpp b/Conway_OpenCL/Conway.cpp similarity index 71% rename from Conway_OpenCL/HelloWorld.cpp rename to Conway_OpenCL/Conway.cpp index fb1f68e..372f2cb 100644 --- a/Conway_OpenCL/HelloWorld.cpp +++ b/Conway_OpenCL/Conway.cpp @@ -13,8 +13,6 @@ #define SUCCESS 0 #define FAILURE 1 - - float elap_time() { static __int64 start = 0; static __int64 frequency = 0; @@ -71,77 +69,77 @@ int main(int argc, char* argv[]) // ============================== OpenCL Setup ================================================================== - /*Step1: Getting platforms and choose an available one.*/ - cl_uint numPlatforms; //the NO. of platforms - cl_platform_id platform = NULL; //the chosen platform - cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); + // Get the platforms + cl_uint numPlatforms; + cl_platform_id platform = NULL; + cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); // Retrieve the number of platforms if (status != CL_SUCCESS) { std::cout << "Error: Getting platforms!" << std::endl; return FAILURE; } // Choose the first available platform - if(numPlatforms > 0) - { - cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id)); - status = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(numPlatforms > 0) { + cl_platform_id* platforms = new cl_platform_id[numPlatforms]; + status = clGetPlatformIDs(numPlatforms, platforms, NULL); // Now populate the array with the platforms platform = platforms[0]; - free(platforms); + delete platforms; } - /*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/ - cl_uint numDevices = 0; - cl_device_id *devices; + + cl_uint numDevices = 0; + cl_device_id *devices; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); if (numDevices == 0) { //no GPU available. std::cout << "No GPU device available." << std::endl; std::cout << "Choose CPU as default device." << std::endl; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices); - devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); + devices = new cl_device_id[numDevices]; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL); } else { - devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); + devices = new cl_device_id[numDevices]; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); } - - /*Step 3: Create context.*/ cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL); - - /*Step 4: Creating command queue associate with the context.*/ cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); // ============================== Kernel Compilation, Setup ==================================================== - - /*Step 5: Create program object */ - const char *filename = "HelloWorld_Kernel.cl"; + + // Read the kernel from the file to a string + const char *filename = "conway_kernel.cl"; std::string sourceStr; status = convertToString(filename, sourceStr); + + // Create a program with the source const char *source = sourceStr.c_str(); size_t sourceSize[] = {strlen(source)}; cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); - // Build program and set kernel - status=clBuildProgram(program, 1,devices,NULL,NULL,NULL); + // Build the program + status = clBuildProgram(program, 1,devices,NULL,NULL,NULL); + // If the build failed if (status == CL_BUILD_PROGRAM_FAILURE) { + // Determine the size of the log size_t log_size; clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); // Allocate memory for the log - char *log = (char *)malloc(log_size); + char *log = new char[log_size]; // Get the log clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL); // Print the log - printf("%s\n", log); + std::cout << log << std::endl; } - cl_kernel kernel = clCreateKernel(program, "helloworld", NULL); + // Now create the kernel + cl_kernel kernel = clCreateKernel(program, "conway", NULL); // ======================================= Setup grid ========================================================= @@ -149,26 +147,42 @@ int main(int argc, char* argv[]) std::mt19937 rng(time(NULL)); std::uniform_int_distribution rgen(0, 12); // 25% chance - // Init the grid - char* grid = new char[GRID_WIDTH * GRID_HEIGHT* 2]; + // Init the grids + unsigned char* front_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT* 2]; - for (int i = 0; i < 1000 * 1000 * 2; i += 2) { + for (int i = 0; i < 1000 * 1000; i += 2) { if (rgen(rng) == 1) { - grid[i] = 1; - grid[i + 1] = 1; + front_grid[i] = 1; } else { - grid[i] = 0; - grid[i + 1] = 0; + front_grid[i] = 0; } } + unsigned char* rear_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT * 2]; + + for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) { + rear_grid[i] = front_grid[i]; + } + // ====================================== Setup SFML ========================================================== - // Spites for drawing, probably where the biggest slowdown is - sf::RectangleShape live_node; - live_node.setFillColor(sf::Color(145, 181, 207)); - live_node.setSize(sf::Vector2f(1, 1)); + sf::Uint8* asdf = rear_grid; + + sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4]; + + for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) { + + int p = i / 2; + + pixel_array[p * 4] = 49; // R? + pixel_array[p * 4 + 1] = 68; // G? + pixel_array[p * 4 + 2] = 72; // B? + pixel_array[p * 4 + 3] = 255; // A? + } + + char* arr = new char[1000 * 1000]; + // Init window, and loop data sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games"); @@ -179,27 +193,18 @@ int main(int argc, char* argv[]) int err = 0; - cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, &err); + cl_mem frontBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)front_grid, &err); + cl_mem rearBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, &err); + cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err); cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err); cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err); - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&frontBuffer); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer); status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer); - sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4]; - - for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) { - - int p = i / 2; - - pixel_array[p * 4] = 49; // R? - pixel_array[p * 4 + 1] = 68; // G? - pixel_array[p * 4 + 2] = 72; // B? - pixel_array[p * 4 + 3] = 255; // A? - } sf::Texture texture; texture.create(WINDOW_X, WINDOW_Y); @@ -231,7 +236,7 @@ int main(int argc, char* argv[]) // ======================================= OpenCL Shtuff ============================================= // Update the data in GPU memory - status = clEnqueueWriteBuffer(commandQueue, inputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, NULL, 0, NULL); + //status = clEnqueueWriteBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, NULL, 0, NULL); // Work size, for each y line size_t global_work_size[1] = { WORKER_SIZE }; @@ -240,30 +245,8 @@ int main(int argc, char* argv[]) status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL); // Get output, put back into grid - status = clEnqueueReadBuffer(commandQueue, inputBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 2 * sizeof(char), (void*)grid, 0, NULL, NULL); - + status = clEnqueueReadBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, 0, NULL, NULL); - for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) { - - int p = i / 2; - - if (grid[i + 1] == 1) { - - pixel_array[p * 4] = 255; // R? - pixel_array[p * 4 + 1] = 255; // G? - pixel_array[p * 4 + 2] = 255; // B? - pixel_array[p * 4 + 3] = 255; // A? - - } - else if (grid[i] == 1){ - pixel_array[p * 4] = 49; // R? - pixel_array[p * 4 + 1] = 68; // G? - pixel_array[p * 4 + 2] = 72; // B? - pixel_array[p * 4 + 3] = 255; // A? - } - - grid[i] = grid[i + 1]; - } texture.update(pixel_array); window.draw(sprite); @@ -275,7 +258,7 @@ int main(int argc, char* argv[]) // Temporary - status = clReleaseMemObject(inputBuffer); + status = clReleaseMemObject(frontBuffer); status = clReleaseMemObject(workerCountBuffer); status = clReleaseMemObject(gridWidthBuffer); status = clReleaseMemObject(gridHeightBuffer); diff --git a/Conway_OpenCL/Conway_OpenCL.vcxproj b/Conway_OpenCL/Conway_OpenCL.vcxproj index 72c54a7..32f38e0 100644 --- a/Conway_OpenCL/Conway_OpenCL.vcxproj +++ b/Conway_OpenCL/Conway_OpenCL.vcxproj @@ -117,7 +117,7 @@ $(SolutionDir)bin/x86/Debug/HelloWorld.lib - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y @@ -144,7 +144,7 @@ /machine:x64 /debug %(AdditionalOptions) - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y @@ -182,7 +182,7 @@ false - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y @@ -219,15 +219,15 @@ /machine:x64 %(AdditionalOptions) - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y - + - + diff --git a/Conway_OpenCL/HelloWorld_Kernel.cl b/Conway_OpenCL/conway_kernel.cl similarity index 85% rename from Conway_OpenCL/HelloWorld_Kernel.cl rename to Conway_OpenCL/conway_kernel.cl index c3a884f..4619ff3 100644 --- a/Conway_OpenCL/HelloWorld_Kernel.cl +++ b/Conway_OpenCL/conway_kernel.cl @@ -1,4 +1,4 @@ -__kernel void helloworld(__global char* in, __global int* num_workers, __global int* grid_width, __global int* grid_height) +__kernel void conway(__global unsigned char* front_grid, __global unsigned char* rear_grid, __global int* num_workers, __global int* grid_width, __global int* grid_height) { // Caclulate the start and end range that this worker will be calculating diff --git a/Conway_OpenCL/~AutoRecover.Conway_OpenCL.vcxproj b/Conway_OpenCL/~AutoRecover.Conway_OpenCL.vcxproj index 72c54a7..32f38e0 100644 --- a/Conway_OpenCL/~AutoRecover.Conway_OpenCL.vcxproj +++ b/Conway_OpenCL/~AutoRecover.Conway_OpenCL.vcxproj @@ -117,7 +117,7 @@ $(SolutionDir)bin/x86/Debug/HelloWorld.lib - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y @@ -144,7 +144,7 @@ /machine:x64 /debug %(AdditionalOptions) - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y @@ -182,7 +182,7 @@ false - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y @@ -219,15 +219,15 @@ /machine:x64 %(AdditionalOptions) - copy HelloWorld_Kernel.cl "$(OutDir)HelloWorld_Kernel.cl" /Y + copy conway_kernel.cl "$(OutDir)conway_kernel.cl" /Y - + - +