queue: move num_queued_buffers out of buffer mananger

This value is always updated from queue/dequeue and thus from the same
thread, so there is no need to put it behind a mutex.
diff --git a/src/device/queue.rs b/src/device/queue.rs
index 08d3179..d13282d 100644
--- a/src/device/queue.rs
+++ b/src/device/queue.rs
@@ -13,6 +13,7 @@
 use states::BufferState;
 use states::*;
 use std::os::unix::io::{AsRawFd, RawFd};
+use std::sync::atomic::Ordering;
 use std::sync::{Arc, Mutex, Weak};
 
 /// Contains the handles (pointers to user memory or DMABUFs) that are kept
@@ -214,6 +215,7 @@
             _d: std::marker::PhantomData,
             state: BuffersAllocated {
                 num_buffers,
+                num_queued_buffers: Default::default(),
                 buffers_state: Arc::new(Mutex::new(BuffersManager::new(num_buffers))),
                 buffer_features: querybuf,
             },
@@ -277,7 +279,7 @@
     /// Returns the number of buffers currently queued (i.e. being processed
     /// by the device).
     pub fn num_queued_buffers(&self) -> usize {
-        self.state.buffers_state.lock().unwrap().num_queued_buffers
+        self.state.num_queued_buffers.load(Ordering::SeqCst)
     }
 
     pub fn streamon(&self) -> Result<()> {
@@ -321,7 +323,9 @@
             })
             .collect();
 
-        buffers_state.num_queued_buffers -= canceled_buffers.len();
+        self.state
+            .num_queued_buffers
+            .fetch_sub(canceled_buffers.len(), Ordering::SeqCst);
         for buffer in &canceled_buffers {
             buffers_state.allocator.return_buffer(buffer.index as usize);
         }
@@ -406,7 +410,7 @@
         };
         let fuse = BufferStateFuse::new(Arc::downgrade(&self.state.buffers_state), id);
 
-        buffers_state.num_queued_buffers -= 1;
+        self.state.num_queued_buffers.fetch_sub(1, Ordering::SeqCst);
 
         Ok(DQBuffer::new(plane_handles, dqbuf, fuse))
     }
diff --git a/src/device/queue/qbuf.rs b/src/device/queue/qbuf.rs
index 911d426..7b086ca 100644
--- a/src/device/queue/qbuf.rs
+++ b/src/device/queue/qbuf.rs
@@ -6,6 +6,7 @@
 use crate::Error;
 use std::cmp::Ordering;
 use std::fmt::{self, Debug, Display};
+use std::sync::atomic;
 
 /// Error that can occur when queuing a buffer. It wraps a regular error and also
 /// returns the plane handles back to the user.
@@ -154,12 +155,13 @@
             &mut buffers_state.buffers_state[self.index],
             BufferState::Queued(plane_handles),
         );
-        // TODO this indicates that we should probably use treemaps for each buffer state
-        // (or bitmaps for simple state and a treemap for the queued one) instead of a global
-        // array?
-        buffers_state.num_queued_buffers += 1;
         drop(buffers_state);
 
+        self.queue
+            .state
+            .num_queued_buffers
+            .fetch_add(1, atomic::Ordering::SeqCst);
+
         Ok(())
     }
 }
diff --git a/src/device/queue/states.rs b/src/device/queue/states.rs
index c31c871..cfea702 100644
--- a/src/device/queue/states.rs
+++ b/src/device/queue/states.rs
@@ -3,6 +3,7 @@
 use crate::memory::Memory;
 use std::collections::VecDeque;
 
+use std::sync::atomic::AtomicUsize;
 use std::sync::{Arc, Mutex};
 
 /// Trait for the different states a queue can be in. This allows us to limit
@@ -64,7 +65,6 @@
 pub(super) struct BuffersManager<M: Memory> {
     pub(super) allocator: FifoBufferAllocator,
     pub(super) buffers_state: Vec<BufferState<M>>,
-    pub(super) num_queued_buffers: usize,
 }
 
 impl<M: Memory> BuffersManager<M> {
@@ -74,7 +74,6 @@
             buffers_state: std::iter::repeat_with(|| BufferState::Free)
                 .take(num_buffers)
                 .collect(),
-            num_queued_buffers: 0,
         }
     }
 }
@@ -83,6 +82,8 @@
 /// streamed on and off, and buffers can be queued and dequeued.
 pub struct BuffersAllocated<M: Memory> {
     pub(super) num_buffers: usize,
+    // TODO replace with Cell<usize>? Since we are not using this in a multi-threaded context.
+    pub(super) num_queued_buffers: AtomicUsize,
     pub(super) buffers_state: Arc<Mutex<BuffersManager<M>>>,
     pub(super) buffer_features: ioctl::QueryBuffer,
 }