parent
73651afb9c
commit
5c593695e8
@ -0,0 +1,250 @@
|
|||||||
|
#include <CL/cl.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include <random>
|
||||||
|
#include <ctime>
|
||||||
|
#include <SFML/Graphics.hpp>
|
||||||
|
|
||||||
|
#define SUCCESS 0
|
||||||
|
#define FAILURE 1
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
/* convert the kernel file into a string */
|
||||||
|
int convertToString(const char *filename, std::string& s)
|
||||||
|
{
|
||||||
|
size_t size;
|
||||||
|
char* str;
|
||||||
|
std::fstream f(filename, (std::fstream::in | std::fstream::binary));
|
||||||
|
|
||||||
|
if(f.is_open())
|
||||||
|
{
|
||||||
|
size_t fileSize;
|
||||||
|
f.seekg(0, std::fstream::end);
|
||||||
|
size = fileSize = (size_t)f.tellg();
|
||||||
|
f.seekg(0, std::fstream::beg);
|
||||||
|
str = new char[size+1];
|
||||||
|
if(!str)
|
||||||
|
{
|
||||||
|
f.close();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
f.read(str, fileSize);
|
||||||
|
f.close();
|
||||||
|
str[size] = '\0';
|
||||||
|
s = str;
|
||||||
|
delete[] str;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
cout<<"Error: failed to open file\n:"<<filename<<endl;
|
||||||
|
return FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
// 1000 x 1000 grid
|
||||||
|
std::mt19937 rng(time(NULL));
|
||||||
|
std::uniform_int_distribution<int> rgen(0, 4); // 25% chance
|
||||||
|
|
||||||
|
char* grid = new char[1000 * 1000 * 2];
|
||||||
|
|
||||||
|
for (int i = 0; i < 1000 * 1000 * 2; i += 2) {
|
||||||
|
if (rgen(rng) == 1) {
|
||||||
|
grid[i] = 1;
|
||||||
|
grid[i + 1] = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
grid[i] = 0;
|
||||||
|
grid[i + 1] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*Step1: Getting platforms and choose an available one.*/
|
||||||
|
cl_uint numPlatforms; //the NO. of platforms
|
||||||
|
cl_platform_id platform = NULL; //the chosen platform
|
||||||
|
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||||
|
if (status != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
cout << "Error: Getting platforms!" << endl;
|
||||||
|
return FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*For clarity, choose the first available platform. */
|
||||||
|
if(numPlatforms > 0)
|
||||||
|
{
|
||||||
|
cl_platform_id* platforms = (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id));
|
||||||
|
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||||
|
platform = platforms[0];
|
||||||
|
free(platforms);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
|
||||||
|
cl_uint numDevices = 0;
|
||||||
|
cl_device_id *devices;
|
||||||
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
|
||||||
|
if (numDevices == 0) //no GPU available.
|
||||||
|
{
|
||||||
|
cout << "No GPU device available." << endl;
|
||||||
|
cout << "Choose CPU as default device." << endl;
|
||||||
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
|
||||||
|
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
|
||||||
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
|
||||||
|
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*Step 3: Create context.*/
|
||||||
|
cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
|
||||||
|
|
||||||
|
/*Step 4: Creating command queue associate with the context.*/
|
||||||
|
cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
|
||||||
|
|
||||||
|
/*Step 5: Create program object */
|
||||||
|
const char *filename = "HelloWorld_Kernel.cl";
|
||||||
|
string sourceStr;
|
||||||
|
status = convertToString(filename, sourceStr);
|
||||||
|
const char *source = sourceStr.c_str();
|
||||||
|
size_t sourceSize[] = {strlen(source)};
|
||||||
|
cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
|
||||||
|
|
||||||
|
/*Step 6: Build program. */
|
||||||
|
status=clBuildProgram(program, 1,devices,NULL,NULL,NULL);
|
||||||
|
|
||||||
|
/*Step 7: Initial input,output for the host and create memory objects for the kernel*/
|
||||||
|
const char* input = "GdkknVnqkc";
|
||||||
|
size_t strlength = strlen(input);
|
||||||
|
cout << "input string:" << endl;
|
||||||
|
cout << input << endl;
|
||||||
|
char *output = (char*) malloc(strlength + 1);
|
||||||
|
|
||||||
|
cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, (strlength + 1) * sizeof(char),(void *) input, NULL);
|
||||||
|
cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , (strlength + 1) * sizeof(char), NULL, NULL);
|
||||||
|
|
||||||
|
/*Step 8: Create kernel object */
|
||||||
|
cl_kernel kernel = clCreateKernel(program,"helloworld", NULL);
|
||||||
|
|
||||||
|
/*Step 9: Sets Kernel arguments.*/
|
||||||
|
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
|
||||||
|
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer);
|
||||||
|
|
||||||
|
// ======================================= START SFML ==========================================================
|
||||||
|
|
||||||
|
|
||||||
|
// Spites for drawing, probably where the biggest slowdown is
|
||||||
|
sf::RectangleShape live_node;
|
||||||
|
live_node.setFillColor(sf::Color(145, 181, 207));
|
||||||
|
live_node.setSize(sf::Vector2f(WINDOW_X / Node::x_bound, WINDOW_Y / Node::y_bound));
|
||||||
|
|
||||||
|
// Init window, and loop data
|
||||||
|
sf::RenderWindow window(sf::VideoMode(WINDOW_X, WINDOW_Y), "Classic Games");
|
||||||
|
|
||||||
|
float step_size = 0.0005f;
|
||||||
|
double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0;
|
||||||
|
int frame_count = 0;
|
||||||
|
|
||||||
|
std::stack<std::thread> thread_stack;
|
||||||
|
|
||||||
|
while (window.isOpen()) {
|
||||||
|
|
||||||
|
sf::Event event;
|
||||||
|
while (window.pollEvent(event)) {
|
||||||
|
if (event.type == sf::Event::Closed)
|
||||||
|
window.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Time keeping
|
||||||
|
elapsed_time = elap_time();
|
||||||
|
delta_time = elapsed_time - current_time;
|
||||||
|
current_time = elapsed_time;
|
||||||
|
if (delta_time > 0.02f)
|
||||||
|
delta_time = 0.02f;
|
||||||
|
accumulator_time += delta_time;
|
||||||
|
|
||||||
|
while ((accumulator_time - step_size) >= step_size) {
|
||||||
|
accumulator_time -= step_size;
|
||||||
|
|
||||||
|
// Do nothing, FPS tied update()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Implicit dead node color
|
||||||
|
window.clear(sf::Color(49, 68, 72));
|
||||||
|
|
||||||
|
for (int i = 0; i < 12; i++) {
|
||||||
|
thread_stack.emplace(updateRange, &node_vec, (node_vec.size() / 12)* i, (node_vec.size() / 12)* (i + 1));
|
||||||
|
}
|
||||||
|
while (!thread_stack.empty()) {
|
||||||
|
thread_stack.top().join();
|
||||||
|
thread_stack.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
//for (int i = 0; i < node_vec.size(); i++) {
|
||||||
|
// node_vec.at(i).Update(&node_vec);
|
||||||
|
//}
|
||||||
|
|
||||||
|
for (int i = 0; i < node_vec.size(); i++) {
|
||||||
|
node_vec[i].ShiftState();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < node_vec.size(); i++) {
|
||||||
|
if (node_vec.at(i).CurrentState() == true) {
|
||||||
|
live_node.setPosition((i % Node::x_bound) * live_node.getGlobalBounds().width, (i / Node::x_bound) * live_node.getGlobalBounds().height);
|
||||||
|
window.draw(live_node);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//dead_node.setPosition(i % Node::x_bound * dead_node.getGlobalBounds().width, i / Node::x_bound * dead_node.getGlobalBounds().height);
|
||||||
|
//window.draw(live_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_count++;
|
||||||
|
window.display();
|
||||||
|
|
||||||
|
|
||||||
|
// ======================================= END SFML ==========================================================
|
||||||
|
|
||||||
|
/*Step 10: Running the kernel.*/
|
||||||
|
size_t global_work_size[1] = {strlength};
|
||||||
|
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
|
||||||
|
|
||||||
|
/*Step 11: Read the cout put back to host memory.*/
|
||||||
|
status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, strlength * sizeof(char), output, 0, NULL, NULL);
|
||||||
|
|
||||||
|
output[strlength] = '\0'; //Add the terminal character to the end of output.
|
||||||
|
cout << "\noutput string:" << endl;
|
||||||
|
cout << output << endl;
|
||||||
|
|
||||||
|
/*Step 12: Clean the resources.*/
|
||||||
|
status = clReleaseKernel(kernel); //Release kernel.
|
||||||
|
status = clReleaseProgram(program); //Release the program object.
|
||||||
|
status = clReleaseMemObject(inputBuffer); //Release mem object.
|
||||||
|
status = clReleaseMemObject(outputBuffer);
|
||||||
|
status = clReleaseCommandQueue(commandQueue); //Release Command queue.
|
||||||
|
status = clReleaseContext(context); //Release context.
|
||||||
|
|
||||||
|
if (output != NULL)
|
||||||
|
{
|
||||||
|
free(output);
|
||||||
|
output = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devices != NULL)
|
||||||
|
{
|
||||||
|
free(devices);
|
||||||
|
devices = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<"Passed!\n";
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
@ -0,0 +1,51 @@
|
|||||||
|
__kernel void helloworld(__global char* in, __global int num_workers, __global int grid_width, __global int grid_height)
|
||||||
|
{
|
||||||
|
// Caclulate the start and end range that this worker will be calculating
|
||||||
|
|
||||||
|
int data_length = grid_width * grid_height;
|
||||||
|
|
||||||
|
int start_range = (data_length / num_workers) * get_global_id(0) * 2; // * 2 = padding
|
||||||
|
int end_range = (data_length / num_workers) * (get_global_id(0) + 1) * 2;
|
||||||
|
|
||||||
|
// x, y + 1
|
||||||
|
|
||||||
|
int neighbors = 0;
|
||||||
|
|
||||||
|
for (int i = start_range; i < end_range; i += 2){
|
||||||
|
|
||||||
|
// add all 8 blocks to neghbors
|
||||||
|
|
||||||
|
// Top
|
||||||
|
neighbors += in[i - grid_width * 2];
|
||||||
|
|
||||||
|
// Top right
|
||||||
|
neightbors += in[i - grid_width * 2 + 2];
|
||||||
|
|
||||||
|
// Right
|
||||||
|
neighbors += in[i + 2];
|
||||||
|
|
||||||
|
// Bottom Right
|
||||||
|
neighbors += in[i + grid_width * 2 + 2];
|
||||||
|
|
||||||
|
// Bottom
|
||||||
|
neighbors += in[i + grid_width * 2];
|
||||||
|
|
||||||
|
// Bottom Left
|
||||||
|
neighbors += in[i + grid_width * 2 - 2];
|
||||||
|
|
||||||
|
// Left
|
||||||
|
neighbors += in[i - 2];
|
||||||
|
|
||||||
|
// Top left
|
||||||
|
neighbors += in[i - grid_width * 2 - 2];
|
||||||
|
|
||||||
|
// push living status to the padded second char
|
||||||
|
|
||||||
|
if (neighbors == 3 || (neighbors == 2 && in[i])){
|
||||||
|
in[i + 1] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
in[i + 1] = 0;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue