From 9b266c8d3f8b4fda9c7614342c3d35c89ed03d01 Mon Sep 17 00:00:00 2001 From: mitchellhansen Date: Wed, 12 Jun 2019 21:22:06 -0700 Subject: [PATCH] moved gpu example to main --- Cargo.toml | 3 + resources/shaders/add.compute | 12 +++ src/basic-compute-shader.rs | 160 ---------------------------------- src/main.rs | 51 +++++++++++ 4 files changed, 66 insertions(+), 160 deletions(-) create mode 100644 resources/shaders/add.compute delete mode 100644 src/basic-compute-shader.rs diff --git a/Cargo.toml b/Cargo.toml index 5a29e1e3..09245e19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,5 +13,8 @@ ncollide2d = "0.19.1" nalgebra = "0.18.0" image = "0.21.2" rand = "0.6.5" +vulkano = "0.12.0" +vulkano-shaders = "0.12.0" +time = "0.1.38" diff --git a/resources/shaders/add.compute b/resources/shaders/add.compute new file mode 100644 index 00000000..cf24845d --- /dev/null +++ b/resources/shaders/add.compute @@ -0,0 +1,12 @@ +#version 450 + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 0) buffer Data { + uint data[]; +} data; + +void main() { + uint idx = gl_GlobalInvocationID.x; + data.data[idx] *= 12; +} \ No newline at end of file diff --git a/src/basic-compute-shader.rs b/src/basic-compute-shader.rs deleted file mode 100644 index 5aab015a..00000000 --- a/src/basic-compute-shader.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright (c) 2017 The vulkano developers -// Licensed under the Apache License, Version 2.0 -// or the MIT -// license , -// at your option. All files in the project carrying such -// notice may not be copied, modified, or distributed except -// according to those terms. - -// This example demonstrates how to use the compute capabilities of Vulkan. -// -// While graphics cards have traditionally been used for graphical operations, over time they have -// been more or more used for general-purpose operations as well. This is called "General-Purpose -// GPU", or *GPGPU*. This is what this example demonstrates. - -use vulkano::buffer::{BufferUsage, CpuAccessibleBuffer}; -use vulkano::command_buffer::AutoCommandBufferBuilder; -use vulkano::descriptor::descriptor_set::PersistentDescriptorSet; -use vulkano::device::{Device, DeviceExtensions}; -use vulkano::instance::{Instance, InstanceExtensions, PhysicalDevice}; -use vulkano::pipeline::ComputePipeline; -use vulkano::sync::GpuFuture; -use vulkano::sync; - -use std::sync::Arc; - -fn main() { - // As with other examples, the first step is to create an instance. - let instance = Instance::new(None, &InstanceExtensions::none(), None).unwrap(); - - // Choose which physical device to use. - let physical = PhysicalDevice::enumerate(&instance).next().unwrap(); - - // Choose the queue of the physical device which is going to run our compute operation. - // - // The Vulkan specs guarantee that a compliant implementation must provide at least one queue - // that supports compute operations. - let queue_family = physical.queue_families().find(|&q| q.supports_compute()).unwrap(); - - // Now initializing the device. - let (device, mut queues) = Device::new(physical, - physical.supported_features(), - &DeviceExtensions::none(), - [(queue_family, 0.5)].iter().cloned()).unwrap(); - - // Since we can request multiple queues, the `queues` variable is in fact an iterator. In this - // example we use only one queue, so we just retrieve the first and only element of the - // iterator and throw it away. - let queue = queues.next().unwrap(); - - println!("Device initialized"); - - // Now let's get to the actual example. - // - // What we are going to do is very basic: we are going to fill a buffer with 64k integers - // and ask the GPU to multiply each of them by 12. - // - // GPUs are very good at parallel computations (SIMD-like operations), and thus will do this - // much more quickly than a CPU would do. While a CPU would typically multiply them one by one - // or four by four, a GPU will do it by groups of 32 or 64. - // - // Note however that in a real-life situation for such a simple operation the cost of - // accessing memory usually outweighs the benefits of a faster calculation. Since both the CPU - // and the GPU will need to access data, there is no other choice but to transfer the data - // through the slow PCI express bus. - - // We need to create the compute pipeline that describes our operation. - // - // If you are familiar with graphics pipeline, the principle is the same except that compute - // pipelines are much simpler to create. - let pipeline = Arc::new({ - mod cs { - vulkano_shaders::shader!{ - ty: "compute", - src: " -#version 450 - -layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; - -layout(set = 0, binding = 0) buffer Data { - uint data[]; -} data; - -void main() { - uint idx = gl_GlobalInvocationID.x; - data.data[idx] *= 12; -}" - } - } - let shader = cs::Shader::load(device.clone()).unwrap(); - ComputePipeline::new(device.clone(), &shader.main_entry_point(), &()).unwrap() - }); - - // We start by creating the buffer that will store the data. - let data_buffer = { - // Iterator that produces the data. - let data_iter = (0 .. 65536u32).map(|n| n); - // Builds the buffer and fills it with this iterator. - CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap() - }; - - // In order to let the shader access the buffer, we need to build a *descriptor set* that - // contains the buffer. - // - // The resources that we bind to the descriptor set must match the resources expected by the - // pipeline which we pass as the first parameter. - // - // If you want to run the pipeline on multiple different buffers, you need to create multiple - // descriptor sets that each contain the buffer you want to run the shader on. - let set = Arc::new(PersistentDescriptorSet::start(pipeline.clone(), 0) - .add_buffer(data_buffer.clone()).unwrap() - .build().unwrap() - ); - - // In order to execute our operation, we have to build a command buffer. - let command_buffer = AutoCommandBufferBuilder::primary_one_time_submit(device.clone(), queue.family()).unwrap() - // The command buffer only does one thing: execute the compute pipeline. - // This is called a *dispatch* operation. - // - // Note that we clone the pipeline and the set. Since they are both wrapped around an - // `Arc`, this only clones the `Arc` and not the whole pipeline or set (which aren't - // cloneable anyway). In this example we would avoid cloning them since this is the last - // time we use them, but in a real code you would probably need to clone them. - .dispatch([1024, 1, 1], pipeline.clone(), set.clone(), ()).unwrap() - // Finish building the command buffer by calling `build`. - .build().unwrap(); - - // Let's execute this command buffer now. - // To do so, we TODO: this is a bit clumsy, probably needs a shortcut - let future = sync::now(device.clone()) - .then_execute(queue.clone(), command_buffer).unwrap() - - // This line instructs the GPU to signal a *fence* once the command buffer has finished - // execution. A fence is a Vulkan object that allows the CPU to know when the GPU has - // reached a certain point. - // We need to signal a fence here because below we want to block the CPU until the GPU has - // reached that point in the execution. - .then_signal_fence_and_flush().unwrap(); - - // Blocks execution until the GPU has finished the operation. This method only exists on the - // future that corresponds to a signalled fence. In other words, this method wouldn't be - // available if we didn't call `.then_signal_fence_and_flush()` earlier. - // The `None` parameter is an optional timeout. - // - // Note however that dropping the `future` variable (with `drop(future)` for example) would - // block execution as well, and this would be the case even if we didn't call - // `.then_signal_fence_and_flush()`. - // Therefore the actual point of calling `.then_signal_fence_and_flush()` and `.wait()` is to - // make things more explicit. In the future, if the Rust language gets linear types vulkano may - // get modified so that only fence-signalled futures can get destroyed like this. - future.wait(None).unwrap(); - - // Now that the GPU is done, the content of the buffer should have been modified. Let's - // check it out. - // The call to `read()` would return an error if the buffer was still in use by the GPU. - let data_buffer_content = data_buffer.read().unwrap(); - for n in 0 .. 65536u32 { - assert_eq!(data_buffer_content[n as usize], n * 12); - } -} diff --git a/src/main.rs b/src/main.rs index d3157ce6..15576b65 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,6 +19,16 @@ use sfml::system::Vector2 as sfVec2; use sfml::window::*; use sfml::window::{Event, Key, Style}; +use vulkano::buffer::{BufferUsage, CpuAccessibleBuffer}; +use vulkano::command_buffer::AutoCommandBufferBuilder; +use vulkano::descriptor::descriptor_set::PersistentDescriptorSet; +use vulkano::device::{Device, DeviceExtensions}; +use vulkano::instance::{Instance, InstanceExtensions, PhysicalDevice}; +use vulkano::pipeline::ComputePipeline; +use vulkano::sync::GpuFuture; +use vulkano::sync; +use std::sync::Arc; + use crate::input::Input; use crate::slider::Slider; use crate::timer::Timer; @@ -135,6 +145,47 @@ fn surrounding_pixels(x: u32, y: u32, img: &DynamicImage) -> Vec fn main() { + let instance = Instance::new(None, &InstanceExtensions::none(), None).unwrap(); + let physical = PhysicalDevice::enumerate(&instance).next().unwrap(); + let queue_family = physical.queue_families().find(|&q| q.supports_compute()).unwrap(); + let (device, mut queues) = Device::new(physical, + physical.supported_features(), + &DeviceExtensions::none(), + [(queue_family, 0.5)].iter().cloned()).unwrap(); + let queue = queues.next().unwrap(); + + println!("Device initialized"); + + let pipeline = Arc::new({ + mod cs { + vulkano_shaders::shader!{ + ty: "compute", + path: "resources/shaders/add.compute" + } + } + let shader = cs::Shader::load(device.clone()).unwrap(); + ComputePipeline::new(device.clone(), &shader.main_entry_point(), &()).unwrap() + }); + let data_buffer = { + let data_iter = (0 .. 65536u32).map(|n| n); + CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap() + }; + let set = Arc::new(PersistentDescriptorSet::start(pipeline.clone(), 0) + .add_buffer(data_buffer.clone()).unwrap() + .build().unwrap() + ); + let command_buffer = AutoCommandBufferBuilder::primary_one_time_submit(device.clone(), queue.family()).unwrap() + .dispatch([1024, 1, 1], pipeline.clone(), set.clone(), ()).unwrap() + .build().unwrap(); + let future = sync::now(device.clone()) + .then_execute(queue.clone(), command_buffer).unwrap() + .then_signal_fence_and_flush().unwrap(); + future.wait(None).unwrap(); + let data_buffer_content = data_buffer.read().unwrap(); + for n in 0 .. 65536u32 { + assert_eq!(data_buffer_content[n as usize], n * 12); + } + let mut img = image::open("resources/images/funky-bird.jpg").unwrap(); let xy = img.dimensions();