|
|
|
@ -1,6 +1,6 @@
|
|
|
|
|
use vulkano::buffer::{BufferUsage, CpuAccessibleBuffer, DeviceLocalBuffer, ImmutableBuffer, BufferAccess};
|
|
|
|
|
use vulkano::command_buffer::AutoCommandBufferBuilder;
|
|
|
|
|
use vulkano::descriptor::descriptor_set::PersistentDescriptorSet;
|
|
|
|
|
use vulkano::descriptor::descriptor_set::{PersistentDescriptorSet, StdDescriptorPoolAlloc};
|
|
|
|
|
use vulkano::device::{Device, DeviceExtensions, QueuesIter, Queue};
|
|
|
|
|
use vulkano::instance::{Instance, InstanceExtensions, PhysicalDevice, QueueFamily};
|
|
|
|
|
use vulkano::pipeline::ComputePipeline;
|
|
|
|
@ -12,21 +12,30 @@ use std::ffi::CStr;
|
|
|
|
|
use std::path::PathBuf;
|
|
|
|
|
use shade_runner as sr;
|
|
|
|
|
use image::DynamicImage;
|
|
|
|
|
use image::GenericImageView;
|
|
|
|
|
use vulkano::descriptor::pipeline_layout::PipelineLayout;
|
|
|
|
|
use image::GenericImage;
|
|
|
|
|
use shade_runner::ComputeLayout;
|
|
|
|
|
use vulkano::descriptor::descriptor_set::PersistentDescriptorSetBuf;
|
|
|
|
|
|
|
|
|
|
pub struct VkProcessor<'a> {
|
|
|
|
|
instance: Arc<Instance>,
|
|
|
|
|
physical: PhysicalDevice<'a>,
|
|
|
|
|
queue_family: QueueFamily<'a>,
|
|
|
|
|
device: Arc<Device>,
|
|
|
|
|
queues: QueuesIter,
|
|
|
|
|
queue: Arc<Queue>,
|
|
|
|
|
img: Option<DynamicImage>,
|
|
|
|
|
image_buffer: Vec<u8>,
|
|
|
|
|
buffers: Vec::
|
|
|
|
|
}
|
|
|
|
|
pub instance: Arc<Instance>,
|
|
|
|
|
pub physical: PhysicalDevice<'a>,
|
|
|
|
|
pub queue_family: QueueFamily<'a>,
|
|
|
|
|
pub pipeline: Option<Arc<ComputePipeline<PipelineLayout<shade_runner::layouts::ComputeLayout>>>>,
|
|
|
|
|
pub device: Arc<Device>,
|
|
|
|
|
pub queues: QueuesIter,
|
|
|
|
|
pub queue: Arc<Queue>,
|
|
|
|
|
pub set: Option<Arc<PersistentDescriptorSet<std::sync::Arc<ComputePipeline<PipelineLayout<shade_runner::layouts::ComputeLayout>>>, ((((), PersistentDescriptorSetBuf<std::sync::Arc<vulkano::buffer::cpu_access::CpuAccessibleBuffer<[u8]>>>), PersistentDescriptorSetBuf<std::sync::Arc<vulkano::buffer::cpu_access::CpuAccessibleBuffer<[u8]>>>), PersistentDescriptorSetBuf<std::sync::Arc<vulkano::buffer::cpu_access::CpuAccessibleBuffer<[u32]>>>)>>>,
|
|
|
|
|
pub img: Option<DynamicImage>,
|
|
|
|
|
pub image_buffer: Vec<u8>,
|
|
|
|
|
pub img_buffers: Vec<Arc<CpuAccessibleBuffer<[u8]>>>,
|
|
|
|
|
pub settings_buffer: Option<Arc<CpuAccessibleBuffer<[u32]>>>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a> VkProcessor<'a> {
|
|
|
|
|
pub fn new() -> VkProcessor<'a> {
|
|
|
|
|
|
|
|
|
|
impl VkProcessor {
|
|
|
|
|
pub fn new() -> VkProcessor {
|
|
|
|
|
let instance = Instance::new(None, &InstanceExtensions::none(), None).unwrap();
|
|
|
|
|
let physical = PhysicalDevice::enumerate(&instance).next().unwrap();
|
|
|
|
|
let queue_family = physical.queue_families().find(|&q| q.supports_compute()).unwrap();
|
|
|
|
@ -34,17 +43,23 @@ impl VkProcessor {
|
|
|
|
|
physical.supported_features(),
|
|
|
|
|
&DeviceExtensions::none(),
|
|
|
|
|
[(queue_family, 0.5)].iter().cloned()).unwrap();
|
|
|
|
|
|
|
|
|
|
// Self referential struct problem
|
|
|
|
|
VkProcessor {
|
|
|
|
|
instance: instance,
|
|
|
|
|
physical: physical,
|
|
|
|
|
queue_family: queue_family,
|
|
|
|
|
instance: instance.clone(),
|
|
|
|
|
physical: physical.clone(),
|
|
|
|
|
queue_family: physical.queue_families().find(|&q| q.supports_compute()).unwrap(),
|
|
|
|
|
pipeline: Option::None,
|
|
|
|
|
device: device,
|
|
|
|
|
queues: queues,
|
|
|
|
|
queue: queues.next().unwrap(),
|
|
|
|
|
img: Option::None,
|
|
|
|
|
set: Option::None,
|
|
|
|
|
image_buffer: Vec::new(),
|
|
|
|
|
buffers: Vec::new(),
|
|
|
|
|
img_buffers: Vec::new(),
|
|
|
|
|
settings_buffer: Option::None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn compile_kernel(&mut self) {
|
|
|
|
@ -68,20 +83,21 @@ impl VkProcessor {
|
|
|
|
|
).unwrap()
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn load_buffers(&mut self) {
|
|
|
|
|
|
|
|
|
|
self.img = Option::Some(image::open("resources/images/funky-bird.jpg").unwrap());
|
|
|
|
|
|
|
|
|
|
let xy = self.img.dimensions();
|
|
|
|
|
let xy = self.img.unwrap().dimensions();
|
|
|
|
|
let data_length = xy.0 * xy.1 * 4;
|
|
|
|
|
let pixel_count = self.img.raw_pixels().len();
|
|
|
|
|
let pixel_count = self.img.unwrap().raw_pixels().len();
|
|
|
|
|
println!("Pixel count {}", pixel_count);
|
|
|
|
|
|
|
|
|
|
if pixel_count != data_length as usize {
|
|
|
|
|
println!("Creating apha channel...");
|
|
|
|
|
for i in self.img.raw_pixels().iter() {
|
|
|
|
|
for i in self.img.unwrap().raw_pixels().iter() {
|
|
|
|
|
if (self.image_buffer.len() + 1) % 4 == 0 {
|
|
|
|
|
self.image_buffer.push(255);
|
|
|
|
|
}
|
|
|
|
@ -89,58 +105,63 @@ impl VkProcessor {
|
|
|
|
|
}
|
|
|
|
|
self.image_buffer.push(255);
|
|
|
|
|
} else {
|
|
|
|
|
self.image_buffer = self.img.raw_pixels();
|
|
|
|
|
self.image_buffer = self.img.unwrap().raw_pixels();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
println!("Buffer length {}", self.image_buffer.len());
|
|
|
|
|
println!("Size {:?}", xy);
|
|
|
|
|
|
|
|
|
|
println!("Allocating Buffers...");
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
// Pull out the image data and place it in a buffer for the kernel to write to and for us to read from
|
|
|
|
|
let write_buffer = {
|
|
|
|
|
let mut buff = image_buffer.iter();
|
|
|
|
|
let mut buff = self.image_buffer.iter();
|
|
|
|
|
let data_iter = (0..data_length).map(|n| *(buff.next().unwrap()));
|
|
|
|
|
CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap()
|
|
|
|
|
CpuAccessibleBuffer::from_iter(self.device.clone(), BufferUsage::all(), data_iter).unwrap()
|
|
|
|
|
};
|
|
|
|
|
self.img_buffers.push(write_buffer);
|
|
|
|
|
|
|
|
|
|
// Pull out the image data and place it in a buffer for the kernel to read from
|
|
|
|
|
let read_buffer = {
|
|
|
|
|
let mut buff = image_buffer.iter();
|
|
|
|
|
let mut buff = self.image_buffer.iter();
|
|
|
|
|
let data_iter = (0..data_length).map(|n| *(buff.next().unwrap()));
|
|
|
|
|
CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap()
|
|
|
|
|
CpuAccessibleBuffer::from_iter(self.device.clone(), BufferUsage::all(), data_iter).unwrap()
|
|
|
|
|
};
|
|
|
|
|
self.img_buffers.push(read_buffer);
|
|
|
|
|
|
|
|
|
|
// A buffer to hold many i32 values to use as settings
|
|
|
|
|
let settings_buffer = {
|
|
|
|
|
let vec = vec![xy.0, xy.1];
|
|
|
|
|
let mut buff = vec.iter();
|
|
|
|
|
let data_iter = (0..2).map(|n| *(buff.next().unwrap()));
|
|
|
|
|
CpuAccessibleBuffer::from_iter(device.clone(), BufferUsage::all(), data_iter).unwrap()
|
|
|
|
|
CpuAccessibleBuffer::from_iter(self.device.clone(), BufferUsage::all(), data_iter).unwrap()
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
self.settings_buffer = Some(settings_buffer);
|
|
|
|
|
|
|
|
|
|
println!("Done");
|
|
|
|
|
|
|
|
|
|
// Create the data descriptor set for our previously created shader pipeline
|
|
|
|
|
let mut set = PersistentDescriptorSet::start(pipeline.clone(), 0)
|
|
|
|
|
let mut set = PersistentDescriptorSet::start(self.pipeline.unwrap().clone(), 0)
|
|
|
|
|
.add_buffer(write_buffer.clone()).unwrap()
|
|
|
|
|
.add_buffer(read_buffer.clone()).unwrap()
|
|
|
|
|
.add_buffer(settings_buffer.clone()).unwrap();
|
|
|
|
|
|
|
|
|
|
let mut set = Arc::new(set.build().unwrap());
|
|
|
|
|
// self.set = Some(Arc::new(set.build().unwrap()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn run_kernel(&mut self) {
|
|
|
|
|
|
|
|
|
|
println!("Running Kernel...");
|
|
|
|
|
let xy = self.img.unwrap().dimensions();
|
|
|
|
|
|
|
|
|
|
// The command buffer I think pretty much serves to define what runs where for how many times
|
|
|
|
|
let command_buffer = AutoCommandBufferBuilder::primary_one_time_submit(device.clone(), queue.family()).unwrap()
|
|
|
|
|
.dispatch([xy.0, xy.1, 1], pipeline.clone(), set.clone(), ()).unwrap()
|
|
|
|
|
let command_buffer = AutoCommandBufferBuilder::primary_one_time_submit(self.device.clone(), self.queue.family()).unwrap()
|
|
|
|
|
.dispatch([xy.0, xy.1, 1], self.pipeline.unwrap().clone(), self.set.unwrap().clone(), ()).unwrap()
|
|
|
|
|
.build().unwrap();
|
|
|
|
|
|
|
|
|
|
// Create a future for running the command buffer and then just fence it
|
|
|
|
|
let future = sync::now(device.clone())
|
|
|
|
|
.then_execute(queue.clone(), command_buffer).unwrap()
|
|
|
|
|
let future = sync::now(self.device.clone())
|
|
|
|
|
.then_execute(self.queue.clone(), command_buffer).unwrap()
|
|
|
|
|
.then_signal_fence_and_flush().unwrap();
|
|
|
|
|
|
|
|
|
|
// I think this is redundant and returns immediately
|
|
|
|
@ -148,10 +169,12 @@ impl VkProcessor {
|
|
|
|
|
println!("Done running kernel");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn read_image() -> Vec<u8> {
|
|
|
|
|
pub fn read_image(&self) -> Vec<u8> {
|
|
|
|
|
|
|
|
|
|
let xy = self.img.unwrap().dimensions();
|
|
|
|
|
|
|
|
|
|
// The buffer is sync'd so we can just read straight from the handle
|
|
|
|
|
let mut data_buffer_content = write_buffer.read().unwrap();
|
|
|
|
|
let mut data_buffer_content = self.img_buffers.get(0).unwrap().read().unwrap();
|
|
|
|
|
|
|
|
|
|
println!("Reading output");
|
|
|
|
|
|
|
|
|
@ -170,7 +193,7 @@ impl VkProcessor {
|
|
|
|
|
image_buffer.push(b);
|
|
|
|
|
image_buffer.push(a);
|
|
|
|
|
|
|
|
|
|
img.put_pixel(x, y, image::Rgba([r, g, b, a]))
|
|
|
|
|
self.img.unwrap().put_pixel(x, y, image::Rgba([r, g, b, a]))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -179,7 +202,7 @@ impl VkProcessor {
|
|
|
|
|
|
|
|
|
|
pub fn save_image(&self) {
|
|
|
|
|
println!("Saving output");
|
|
|
|
|
img.save(format!("output/{}.png", SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs()));
|
|
|
|
|
self.img.unwrap().save(format!("output/{}.png", SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|