moved gpu example to main

master
mitchellhansen 6 years ago
parent ff679e1cab
commit 9b266c8d3f

@ -13,5 +13,8 @@ ncollide2d = "0.19.1"
nalgebra = "0.18.0" nalgebra = "0.18.0"
image = "0.21.2" image = "0.21.2"
rand = "0.6.5" rand = "0.6.5"
vulkano = "0.12.0"
vulkano-shaders = "0.12.0"
time = "0.1.38"

@ -0,0 +1,12 @@
#version 450
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0) buffer Data {
uint data[];
} data;
void main() {
uint idx = gl_GlobalInvocationID.x;
data.data[idx] *= 12;
}

@ -1,160 +0,0 @@
// Copyright (c) 2017 The vulkano developers
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.
// This example demonstrates how to use the compute capabilities of Vulkan.
//
// While graphics cards have traditionally been used for graphical operations, over time they have
// been more or more used for general-purpose operations as well. This is called "General-Purpose
// GPU", or *GPGPU*. This is what this example demonstrates.
use vulkano::buffer::{BufferUsage, CpuAccessibleBuffer};
use vulkano::command_buffer::AutoCommandBufferBuilder;
use vulkano::descriptor::descriptor_set::PersistentDescriptorSet;
use vulkano::device::{Device, DeviceExtensions};
use vulkano::instance::{Instance, InstanceExtensions, PhysicalDevice};
use vulkano::pipeline::ComputePipeline;
use vulkano::sync::GpuFuture;
use vulkano::sync;
use std::sync::Arc;
fn main() {
// As with other examples, the first step is to create an instance.
let instance = Instance::new(None, &InstanceExtensions::none(), None).unwrap();
// Choose which physical device to use.
let physical = PhysicalDevice::enumerate(&instance).next().unwrap();
// Choose the queue of the physical device which is going to run our compute operation.
//
// The Vulkan specs guarantee that a compliant implementation must provide at least one queue
// that supports compute operations.
let queue_family = physical.queue_families().find(|&q| q.supports_compute()).unwrap();
// Now initializing the device.
let (device, mut queues) = Device::new(physical,
physical.supported_features(),
&DeviceExtensions::none(),
[(queue_family, 0.5)].iter().cloned()).unwrap();
// Since we can request multiple queues, the `queues` variable is in fact an iterator. In this
// example we use only one queue, so we just retrieve the first and only element of the
// iterator and throw it away.
let queue = queues.next().unwrap();
println!("Device initialized");
// Now let's get to the actual example.
//
// What we are going to do is very basic: we are going to fill a buffer with 64k integers
// and ask the GPU to multiply each of them by 12.
//
// GPUs are very good at parallel computations (SIMD-like operations), and thus will do this
// much more quickly than a CPU would do. While a CPU would typically multiply them one by one
// or four by four, a GPU will do it by groups of 32 or 64.
//
// Note however that in a real-life situation for such a simple operation the cost of
// accessing memory usually outweighs the benefits of a faster calculation. Since both the CPU
// and the GPU will need to access data, there is no other choice but to transfer the data
// through the slow PCI express bus.
// We need to create the compute pipeline that describes our operation.
//
// If you are familiar with graphics pipeline, the principle is the same except that compute
// pipelines are much simpler to create.
let pipeline = Arc::new({
mod cs {
vulkano_shaders::shader!{
ty: "compute",
src: "
#version 450
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0) buffer Data {
uint data[];
} data;
void main() {
uint idx = gl_GlobalInvocationID.x;
data.data[idx] *= 12;
}"
}
}
let shader = cs::Shader::load(device.clone()).unwrap();
ComputePipeline::new(device.clone(), &shader.main_entry_point(), &()).unwrap()
});
// We start by creating the buffer that will store the data.
let data_buffer = {
// Iterator that produces the data.
let data_iter = (0 .. 65536u32).map(|n| n);
// Builds the buffer and fills it with this iterator.
CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap()
};
// In order to let the shader access the buffer, we need to build a *descriptor set* that
// contains the buffer.
//
// The resources that we bind to the descriptor set must match the resources expected by the
// pipeline which we pass as the first parameter.
//
// If you want to run the pipeline on multiple different buffers, you need to create multiple
// descriptor sets that each contain the buffer you want to run the shader on.
let set = Arc::new(PersistentDescriptorSet::start(pipeline.clone(), 0)
.add_buffer(data_buffer.clone()).unwrap()
.build().unwrap()
);
// In order to execute our operation, we have to build a command buffer.
let command_buffer = AutoCommandBufferBuilder::primary_one_time_submit(device.clone(), queue.family()).unwrap()
// The command buffer only does one thing: execute the compute pipeline.
// This is called a *dispatch* operation.
//
// Note that we clone the pipeline and the set. Since they are both wrapped around an
// `Arc`, this only clones the `Arc` and not the whole pipeline or set (which aren't
// cloneable anyway). In this example we would avoid cloning them since this is the last
// time we use them, but in a real code you would probably need to clone them.
.dispatch([1024, 1, 1], pipeline.clone(), set.clone(), ()).unwrap()
// Finish building the command buffer by calling `build`.
.build().unwrap();
// Let's execute this command buffer now.
// To do so, we TODO: this is a bit clumsy, probably needs a shortcut
let future = sync::now(device.clone())
.then_execute(queue.clone(), command_buffer).unwrap()
// This line instructs the GPU to signal a *fence* once the command buffer has finished
// execution. A fence is a Vulkan object that allows the CPU to know when the GPU has
// reached a certain point.
// We need to signal a fence here because below we want to block the CPU until the GPU has
// reached that point in the execution.
.then_signal_fence_and_flush().unwrap();
// Blocks execution until the GPU has finished the operation. This method only exists on the
// future that corresponds to a signalled fence. In other words, this method wouldn't be
// available if we didn't call `.then_signal_fence_and_flush()` earlier.
// The `None` parameter is an optional timeout.
//
// Note however that dropping the `future` variable (with `drop(future)` for example) would
// block execution as well, and this would be the case even if we didn't call
// `.then_signal_fence_and_flush()`.
// Therefore the actual point of calling `.then_signal_fence_and_flush()` and `.wait()` is to
// make things more explicit. In the future, if the Rust language gets linear types vulkano may
// get modified so that only fence-signalled futures can get destroyed like this.
future.wait(None).unwrap();
// Now that the GPU is done, the content of the buffer should have been modified. Let's
// check it out.
// The call to `read()` would return an error if the buffer was still in use by the GPU.
let data_buffer_content = data_buffer.read().unwrap();
for n in 0 .. 65536u32 {
assert_eq!(data_buffer_content[n as usize], n * 12);
}
}

@ -19,6 +19,16 @@ use sfml::system::Vector2 as sfVec2;
use sfml::window::*; use sfml::window::*;
use sfml::window::{Event, Key, Style}; use sfml::window::{Event, Key, Style};
use vulkano::buffer::{BufferUsage, CpuAccessibleBuffer};
use vulkano::command_buffer::AutoCommandBufferBuilder;
use vulkano::descriptor::descriptor_set::PersistentDescriptorSet;
use vulkano::device::{Device, DeviceExtensions};
use vulkano::instance::{Instance, InstanceExtensions, PhysicalDevice};
use vulkano::pipeline::ComputePipeline;
use vulkano::sync::GpuFuture;
use vulkano::sync;
use std::sync::Arc;
use crate::input::Input; use crate::input::Input;
use crate::slider::Slider; use crate::slider::Slider;
use crate::timer::Timer; use crate::timer::Timer;
@ -135,6 +145,47 @@ fn surrounding_pixels(x: u32, y: u32, img: &DynamicImage) -> Vec<image::Rgba<u8>
fn main() { fn main() {
let instance = Instance::new(None, &InstanceExtensions::none(), None).unwrap();
let physical = PhysicalDevice::enumerate(&instance).next().unwrap();
let queue_family = physical.queue_families().find(|&q| q.supports_compute()).unwrap();
let (device, mut queues) = Device::new(physical,
physical.supported_features(),
&DeviceExtensions::none(),
[(queue_family, 0.5)].iter().cloned()).unwrap();
let queue = queues.next().unwrap();
println!("Device initialized");
let pipeline = Arc::new({
mod cs {
vulkano_shaders::shader!{
ty: "compute",
path: "resources/shaders/add.compute"
}
}
let shader = cs::Shader::load(device.clone()).unwrap();
ComputePipeline::new(device.clone(), &shader.main_entry_point(), &()).unwrap()
});
let data_buffer = {
let data_iter = (0 .. 65536u32).map(|n| n);
CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap()
};
let set = Arc::new(PersistentDescriptorSet::start(pipeline.clone(), 0)
.add_buffer(data_buffer.clone()).unwrap()
.build().unwrap()
);
let command_buffer = AutoCommandBufferBuilder::primary_one_time_submit(device.clone(), queue.family()).unwrap()
.dispatch([1024, 1, 1], pipeline.clone(), set.clone(), ()).unwrap()
.build().unwrap();
let future = sync::now(device.clone())
.then_execute(queue.clone(), command_buffer).unwrap()
.then_signal_fence_and_flush().unwrap();
future.wait(None).unwrap();
let data_buffer_content = data_buffer.read().unwrap();
for n in 0 .. 65536u32 {
assert_eq!(data_buffer_content[n as usize], n * 12);
}
let mut img = image::open("resources/images/funky-bird.jpg").unwrap(); let mut img = image::open("resources/images/funky-bird.jpg").unwrap();
let xy = img.dimensions(); let xy = img.dimensions();

Loading…
Cancel
Save