Workgroup sizes of 8 along with reducing the amount of work groups improves performance 20x

master
mitchellhansen 5 years ago
parent 711e678969
commit 314fa3e4af

@ -1,6 +1,6 @@
#version 450 #version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) buffer wData { layout(set = 0, binding = 0) buffer wData {
int buf[]; int buf[];
@ -35,44 +35,39 @@ void main() {
uint idx = get_idx(0,0); uint idx = get_idx(0,0);
// ivec4 p = separate(read_buffer.buf[get_idx(0 , 0)]); ivec4 p = separate(read_buffer.buf[get_idx(0 , 0)]);
// ivec4 p0 = separate(read_buffer.buf[get_idx(0 , 1)]); ivec4 p0 = separate(read_buffer.buf[get_idx(0 , 1)]);
// ivec4 p1 = separate(read_buffer.buf[get_idx(0 ,-1)]); ivec4 p1 = separate(read_buffer.buf[get_idx(0 ,-1)]);
// ivec4 p2 = separate(read_buffer.buf[get_idx(1 , 1)]); ivec4 p2 = separate(read_buffer.buf[get_idx(1 , 1)]);
// ivec4 p3 = separate(read_buffer.buf[get_idx(-1,-1)]); ivec4 p3 = separate(read_buffer.buf[get_idx(-1,-1)]);
// ivec4 p4 = separate(read_buffer.buf[get_idx(1 , 0)]); ivec4 p4 = separate(read_buffer.buf[get_idx(1 , 0)]);
// ivec4 p5 = separate(read_buffer.buf[get_idx(-1, 0)]); ivec4 p5 = separate(read_buffer.buf[get_idx(-1, 0)]);
// ivec4 p6 = separate(read_buffer.buf[get_idx(1 ,-1)]); ivec4 p6 = separate(read_buffer.buf[get_idx(1 ,-1)]);
// ivec4 p7 = separate(read_buffer.buf[get_idx(-1, 1)]); ivec4 p7 = separate(read_buffer.buf[get_idx(-1, 1)]);
//
// ivec3 d0 = abs(p0.xyz - p1.xyz); ivec3 d0 = abs(p0.xyz - p1.xyz);
// ivec3 d1 = abs(p2.xyz - p3.xyz); ivec3 d1 = abs(p2.xyz - p3.xyz);
// ivec3 d2 = abs(p4.xyz - p5.xyz); ivec3 d2 = abs(p4.xyz - p5.xyz);
// ivec3 d3 = abs(p6.xyz - p7.xyz); ivec3 d3 = abs(p6.xyz - p7.xyz);
//
// ivec3 m = max(max(max(d0, d1), d2), d3); ivec3 m = max(max(max(d0, d1), d2), d3);
//
// if ((m.x + m.y + m.z) > 200){ if ((m.x + m.y + m.z) > 200){
// p.x = 0; p.x = 0;
// p.y = 0; p.y = 0;
// p.z = 255; p.z = 255;
// } }
// else { else {
// //p.w = 125;
// //p.w = 125; }
// }
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x000000FF) ) | (p.x); write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8); write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x00FF0000) ) | (p.z << 16);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x00FF0000) ) | (p.z << 16); write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0xFF000000) ) | (p.w << 24);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0xFF000000) ) | (p.w << 24);
} }
// Just gonna keep this around // Just gonna keep this around
// read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x000000FF) ) | (p.x); // read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
// read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8); // read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);

@ -329,31 +329,10 @@ impl CanvasState {
*/ */
//TODO should probably use cpu accessible buffer instead of recreating immutes each frame
/*
CpuAccessibleBuffer::from_iter(
device.clone(),
BufferUsage::vertex_buffer(),
self.colored_drawables.iter().cloned(),
).unwrap().0;
*/
//if self.colored_vertex_buffer.len() == 0 {
self.colored_vertex_buffer.clear(); self.colored_vertex_buffer.clear();
{ {
let g = hprof::enter("Colored Vertex Buffer : From Data"); let g = hprof::enter("Colored Vertex Buffer");
self.colored_vertex_buffer.push(
ImmutableBuffer::from_data(self.colored_drawables.clone(),
BufferUsage::vertex_buffer(),
self.queue.clone()).unwrap().0
);
}
{
let g = hprof::enter("Colored Vertex Buffer : From Iter");
self.colored_vertex_buffer.push( self.colored_vertex_buffer.push(
ImmutableBuffer::from_iter( ImmutableBuffer::from_iter(
self.colored_drawables.iter().cloned(), self.colored_drawables.iter().cloned(),
@ -363,18 +342,10 @@ impl CanvasState {
); );
} }
self.colored_vertex_buffer.clear();
//println!("{:?}", self.colored_vertex_buffer.last().unwrap().size());
//self.colored_vertex_buffer.pop();
// }
// println!("{} {}", self.textured_vertex_buffer.len() , self.textured_drawables.len());
// if self.textured_vertex_buffer.len() != self.textured_drawables.len() {
self.textured_vertex_buffer.clear(); self.textured_vertex_buffer.clear();
for (k, v) in self.textured_drawables.drain() {
{ {
let g = hprof::enter("Textured Vertex Buffer : From Iter"); let g = hprof::enter("Textured Vertex Buffer");
for (k, v) in self.textured_drawables.drain() {
self.textured_vertex_buffer.insert( self.textured_vertex_buffer.insert(
k.clone(), k.clone(),
ImmutableBuffer::from_iter( ImmutableBuffer::from_iter(
@ -384,19 +355,7 @@ impl CanvasState {
).unwrap().0, ).unwrap().0,
); );
} }
// {
// let g = hprof::enter("Textured Vertex Buffer : From Data");
// self.textured_vertex_buffer.insert(
// k.clone(),
// ImmutableBuffer::from_data(v.first().unwrap().clone(),
// BufferUsage::vertex_buffer(),
// self.queue.clone()
// ).unwrap().0
// );
// }
//self.textured_vertex_buffer.pop();
} }
// }
} }
fn get_solid_color_descriptor_set(&self, kernel: Arc<CanvasShader>) -> Box<dyn DescriptorSet + Send + Sync> { fn get_solid_color_descriptor_set(&self, kernel: Arc<CanvasShader>) -> Box<dyn DescriptorSet + Send + Sync> {

@ -75,6 +75,7 @@ impl CompuState {
handle handle
} }
// TODO : THIS IS BROKEN
pub fn get_kernel_handle(&self, kernel_name: String) -> Option<Arc<CompuKernelHandle>> { pub fn get_kernel_handle(&self, kernel_name: String) -> Option<Arc<CompuKernelHandle>> {
for i in self.kernels.clone() { for i in self.kernels.clone() {
if i.get_name() == kernel_name { if i.get_name() == kernel_name {
@ -104,7 +105,7 @@ impl CompuState {
let size = buffer.get_size(); let size = buffer.get_size();
command_buffer = command_buffer command_buffer = command_buffer
.dispatch([size.0,size.1,1], p, d, ()).unwrap() .dispatch([size.0/8,size.1/8,1], p, d, ()).unwrap()
} }
// i = (Buffer, Image, Kernel) // i = (Buffer, Image, Kernel)

@ -165,13 +165,13 @@ fn main() {
} }
let mut compu_frame = CompuFrame::new(); let mut compu_frame = CompuFrame::new();
// compu_frame.add(compute_buffer.clone(), compute_kernel.clone()); compu_frame.add(compute_buffer.clone(), compute_kernel.clone());
// compu_frame.add_with_image_swap(compute_buffer.clone(), compute_kernel.clone(), &compu_sprite1); // compu_frame.add_with_image_swap(compute_buffer.clone(), compute_kernel.clone(), &compu_sprite1);
let mut canvas = CanvasFrame::new(); let mut canvas = CanvasFrame::new();
canvas.draw(&sprite); // canvas.draw(&sprite);
//canvas.draw(&sprite2); // canvas.draw(&sprite2);
//canvas.draw(&sprite3); // canvas.draw(&sprite3);
//canvas.draw(&compu_sprite1); //canvas.draw(&compu_sprite1);
{ {
let g = hprof::enter("Run"); let g = hprof::enter("Run");

@ -95,7 +95,7 @@ impl<'a> VkProcessor<'a> {
Swapchain::new(self.device.clone(), Swapchain::new(self.device.clone(),
surface.clone(), surface.clone(),
capabilities.min_image_count + 10, // number of attachment images capabilities.min_image_count, // number of attachment images
format, format,
initial_dimensions, initial_dimensions,
1, // Layers 1, // Layers
@ -103,7 +103,7 @@ impl<'a> VkProcessor<'a> {
&self.queue, &self.queue,
SurfaceTransform::Identity, SurfaceTransform::Identity,
alpha, alpha,
PresentMode::Mailbox, true, None).unwrap() PresentMode::Immediate, true, None).unwrap()
}; };
self.swapchain = Some(swapchain); self.swapchain = Some(swapchain);
@ -174,12 +174,10 @@ impl<'a> VkProcessor<'a> {
pub fn run(&mut self, pub fn run(&mut self,
surface: &'a Arc<Surface<Window>>, surface: &'a Arc<Surface<Window>>,
// mut frame_future: Box<dyn GpuFuture>,
canvas_frame: CanvasFrame, canvas_frame: CanvasFrame,
compute_frame: CompuFrame, compute_frame: CompuFrame,
) ) {
// -> Box<dyn GpuFuture> {
{
{ {
let g = hprof::enter("Waiting at queue"); let g = hprof::enter("Waiting at queue");
self.queue.wait(); self.queue.wait();
@ -189,9 +187,6 @@ impl<'a> VkProcessor<'a> {
let mut framebuffers = let mut framebuffers =
self.canvas.window_size_dependent_setup(&self.swapchain_images.clone().unwrap().clone()); self.canvas.window_size_dependent_setup(&self.swapchain_images.clone().unwrap().clone());
// The docs said to call this on each loop.
// frame_future.cleanup_finished();
// Whenever the window resizes we need to recreate everything dependent on the window size. // Whenever the window resizes we need to recreate everything dependent on the window size.
// In this example that includes the swapchain, the framebuffers and the dynamic state viewport. // In this example that includes the swapchain, the framebuffers and the dynamic state viewport.
if self.swapchain_recreate_needed { if self.swapchain_recreate_needed {
@ -201,33 +196,27 @@ impl<'a> VkProcessor<'a> {
self.swapchain_recreate_needed = false; self.swapchain_recreate_needed = false;
} }
// This function can block if no image is available. The parameter is an optional timeout // This function can block if no image is available. The parameter is an optional timeout
// after which the function call will return an error. // after which the function call will return an error.
let (image_num, acquire_future) = let (image_num, acquire_future) =
match vulkano::swapchain::acquire_next_image( match vulkano::swapchain::acquire_next_image(
self.swapchain.clone().unwrap().clone(), self.swapchain.clone().unwrap().clone(),
//Some(Duration::from_millis(3)),
None, None,
) { ) {
Ok(r) => r, Ok(r) => r,
Err(AcquireError::OutOfDate) => { Err(AcquireError::OutOfDate) => {
self.swapchain_recreate_needed = true; self.swapchain_recreate_needed = true;
//return Box::new(sync::now(self.device.clone())) as Box<_>;
return; return;
} }
Err(err) => panic!("{:?}", err) Err(err) => panic!("{:?}", err)
}; };
drop(g);
let g = hprof::enter("Joining the future");
// let future = frame_future.join(acquire_future);
drop(g); drop(g);
{ {
let g = hprof::enter("Canvas creates GPU buffers"); let g = hprof::enter("Canvas creates GPU buffers");
// take the canvas frame and create the vertex buffers // take the canvas frame and create the vertex buffers
// TODO: This performs gpu buffer creation. Shouldn't be in hotpath // TODO: This performs gpu buffer creation. Shouldn't be in hotpath??
self.canvas.draw(canvas_frame); self.canvas.draw(canvas_frame);
} }
@ -236,41 +225,43 @@ impl<'a> VkProcessor<'a> {
let g = hprof::enter("Push compute commands to command buffer"); let g = hprof::enter("Push compute commands to command buffer");
// Add the compute commands // Add the compute commands
// let mut command_buffer = self.compute_state.compute_commands(compute_frame, command_buffer, &self.canvas); let mut command_buffer = self.compute_state.compute_commands(compute_frame, command_buffer, &self.canvas);
drop(g); drop(g);
let g = hprof::enter("Push draw commands to command buffer"); let g = hprof::enter("Push draw commands to command buffer");
// Add the draw commands // Add the draw commands
// let mut command_buffer = self.canvas.draw_commands(command_buffer, framebuffers, image_num); let mut command_buffer = self.canvas.draw_commands(command_buffer, framebuffers, image_num);
drop(g);
// And build // And build
let command_buffer = command_buffer.build().unwrap(); let command_buffer = command_buffer.build().unwrap();
drop(g);
// Wait on the previous frame, then execute the command buffer and present the image // Wait on the previous frame, then execute the command buffer and present the image
{ {
let g = hprof::enter("Mussing with the frame future");
//let future = future //frame_future.join(acquire_future) let g = hprof::enter("Joining on the framebuffer");
let future = sync::now(self.device.clone()) let mut future = sync::now(self.device.clone())
.join(acquire_future);
drop(g);
let g = hprof::enter("Running the kernel and waiting on the future");
let future = future
.then_execute(self.queue.clone(), command_buffer).unwrap() .then_execute(self.queue.clone(), command_buffer).unwrap()
.then_swapchain_present(self.queue.clone(), self.swapchain.clone().unwrap().clone(), image_num) .then_swapchain_present(self.queue.clone(), self.swapchain.clone().unwrap().clone(), image_num)
.then_signal_fence_and_flush(); .then_signal_fence_and_flush();
future.unwrap().wait(None).unwrap();
match future {
// match future { Ok(future) => {
// Ok(future) => { future.wait(None).unwrap();
// (Box::new(future) as Box<_>) }
// } Err(FlushError::OutOfDate) => {
// Err(FlushError::OutOfDate) => { self.swapchain_recreate_needed = true;
// self.swapchain_recreate_needed = true; }
// (Box::new(sync::now(self.device.clone())) as Box<_>) Err(e) => {
// } println!("{:?}", e);
// Err(e) => { }
// println!("{:?}", e); }
// (Box::new(sync::now(self.device.clone())) as Box<_>)
// }
// }
} }
} }
} }

Loading…
Cancel
Save