| // Copyright 2022 The ChromiumOS Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| use std::cell::RefCell; |
| use std::cell::RefMut; |
| use std::collections::HashSet; |
| use std::fmt::Debug; |
| use std::marker::PhantomData; |
| use std::rc::Rc; |
| |
| use anyhow::anyhow; |
| use anyhow::Context as AnyhowContext; |
| use byteorder::ByteOrder; |
| use byteorder::LittleEndian; |
| use libva::Config; |
| use libva::Context; |
| use libva::Display; |
| use libva::Image; |
| use libva::PictureEnd; |
| use libva::PictureNew; |
| use libva::PictureSync; |
| use libva::Surface; |
| use libva::VAConfigAttrib; |
| use libva::VAConfigAttribType; |
| use libva::VaError; |
| |
| use crate::decoder::DecodedHandle as DecodedHandleTrait; |
| use crate::decoder::DynHandle; |
| use crate::decoder::Error as VideoDecoderError; |
| use crate::decoder::MappableHandle; |
| use crate::decoder::Result as VideoDecoderResult; |
| use crate::decoder::StatelessBackendError; |
| use crate::decoder::StatelessBackendResult; |
| use crate::decoder::VideoDecoderBackend; |
| use crate::i4xx_copy; |
| use crate::nv12_copy; |
| use crate::utils::vaapi::surface_pool::SurfacePool; |
| use crate::y410_to_i410; |
| use crate::DecodedFormat; |
| use crate::Resolution; |
| |
| #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] |
| struct FormatMap { |
| pub rt_format: u32, |
| pub va_fourcc: u32, |
| pub decoded_format: DecodedFormat, |
| } |
| |
| /// Maps a given VA_RT_FORMAT to a compatible decoded format in an arbitrary |
| /// preferred order. |
| const FORMAT_MAP: [FormatMap; 10] = [ |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV420, |
| va_fourcc: libva::constants::VA_FOURCC_NV12, |
| decoded_format: DecodedFormat::NV12, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV420, |
| va_fourcc: libva::constants::VA_FOURCC_I420, |
| decoded_format: DecodedFormat::I420, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV422, |
| va_fourcc: libva::constants::VA_FOURCC_422H, |
| decoded_format: DecodedFormat::I422, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV444, |
| va_fourcc: libva::constants::VA_FOURCC_444P, |
| decoded_format: DecodedFormat::I444, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV420_10, |
| va_fourcc: libva::constants::VA_FOURCC_P010, |
| decoded_format: DecodedFormat::I010, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV420_12, |
| va_fourcc: libva::constants::VA_FOURCC_P012, |
| decoded_format: DecodedFormat::I012, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV422_10, |
| va_fourcc: libva::constants::VA_FOURCC_Y210, |
| decoded_format: DecodedFormat::I210, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV422_12, |
| va_fourcc: libva::constants::VA_FOURCC_Y212, |
| decoded_format: DecodedFormat::I212, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV444_10, |
| va_fourcc: libva::constants::VA_FOURCC_Y410, |
| decoded_format: DecodedFormat::I410, |
| }, |
| FormatMap { |
| rt_format: libva::constants::VA_RT_FORMAT_YUV444_12, |
| va_fourcc: libva::constants::VA_FOURCC_Y412, |
| decoded_format: DecodedFormat::I412, |
| }, |
| ]; |
| |
| /// Returns a set of supported decoded formats given `rt_format` |
| fn supported_formats_for_rt_format( |
| display: &Display, |
| rt_format: u32, |
| profile: i32, |
| entrypoint: u32, |
| image_formats: &[libva::VAImageFormat], |
| ) -> anyhow::Result<HashSet<FormatMap>> { |
| let mut attrs = vec![VAConfigAttrib { |
| type_: VAConfigAttribType::VAConfigAttribRTFormat, |
| value: 0, |
| }]; |
| |
| display.get_config_attributes(profile, entrypoint, &mut attrs)?; |
| |
| // See whether this RT_FORMAT is supported by the given VAProfile and |
| // VAEntrypoint pair. |
| if attrs[0].value == libva::constants::VA_ATTRIB_NOT_SUPPORTED |
| || attrs[0].value & rt_format == 0 |
| { |
| return Err(anyhow!( |
| "rt_format {:?} not supported for profile {:?} and entrypoint {:?}", |
| rt_format, |
| profile, |
| entrypoint |
| )); |
| } |
| |
| let mut supported_formats = HashSet::new(); |
| |
| for format in FORMAT_MAP { |
| if format.rt_format == rt_format { |
| supported_formats.insert(format); |
| } |
| } |
| |
| // Only retain those that the hardware can actually map into. |
| supported_formats.retain(|&entry| { |
| image_formats |
| .iter() |
| .any(|fmt| fmt.fourcc == entry.va_fourcc) |
| }); |
| |
| Ok(supported_formats) |
| } |
| |
| impl TryInto<Surface> for PictureState { |
| type Error = anyhow::Error; |
| |
| fn try_into(self) -> Result<Surface, Self::Error> { |
| match self { |
| PictureState::Ready(picture) => picture |
| .take_surface() |
| .map_err(|_| anyhow!("picture is still referenced")), |
| PictureState::Pending(picture) => picture |
| .sync() |
| .map_err(|(e, _)| e)? |
| .take_surface() |
| .map_err(|_| anyhow!("picture is still referenced")), |
| PictureState::Invalid => unreachable!(), |
| } |
| } |
| } |
| |
| /// A decoded frame handle. |
| pub(crate) type DecodedHandle = Rc<RefCell<GenericBackendHandle>>; |
| |
| impl DecodedHandleTrait for DecodedHandle { |
| fn coded_resolution(&self) -> Resolution { |
| self.borrow().coded_resolution |
| } |
| |
| fn display_resolution(&self) -> Resolution { |
| self.borrow().display_resolution |
| } |
| |
| fn timestamp(&self) -> u64 { |
| self.borrow().timestamp() |
| } |
| |
| fn dyn_picture_mut(&self) -> RefMut<dyn DynHandle> { |
| self.borrow_mut() |
| } |
| |
| fn is_ready(&self) -> bool { |
| self.borrow().is_va_ready().unwrap_or(true) |
| } |
| |
| fn sync(&self) -> StatelessBackendResult<()> { |
| self.borrow_mut().sync().context("while syncing picture")?; |
| |
| Ok(()) |
| } |
| } |
| |
| mod surface_pool { |
| use std::collections::VecDeque; |
| |
| use anyhow::anyhow; |
| use libva::Surface; |
| |
| use crate::Resolution; |
| |
| /// A surface pool to reduce the number of costly Surface allocations. |
| /// |
| /// The pool only houses Surfaces that fits the pool's coded resolution. |
| /// Stale surfaces are dropped when either the pool resolution changes, or when |
| /// stale surfaces are retrieved. |
| /// |
| /// This means that this pool is suitable for inter-frame DRC, as the stale |
| /// surfaces will gracefully be dropped, which is arguably better than the |
| /// alternative of having more than one pool active at a time. |
| pub(crate) struct SurfacePool { |
| surfaces: VecDeque<Surface>, |
| coded_resolution: Resolution, |
| } |
| |
| impl SurfacePool { |
| /// Creates a new pool |
| pub(crate) fn new(surfaces: Vec<Surface>, resolution: Resolution) -> Self { |
| Self { |
| surfaces: VecDeque::from(surfaces), |
| coded_resolution: resolution, |
| } |
| } |
| |
| /// Retrieve the current coded resolution of the pool |
| pub(crate) fn coded_resolution(&self) -> Resolution { |
| self.coded_resolution |
| } |
| |
| /// Sets the coded resolution of the pool. Releases any stale surfaces. |
| pub(crate) fn set_coded_resolution(&mut self, resolution: Resolution) { |
| self.coded_resolution = resolution; |
| self.surfaces |
| .retain(|s| Resolution::from(s.size()).can_contain(self.coded_resolution)); |
| } |
| |
| /// Adds a new surface to the pool |
| pub(crate) fn add_surface( |
| &mut self, |
| surface: Surface, |
| ) -> Result<(), (Surface, anyhow::Error)> { |
| if Resolution::from(surface.size()).can_contain(self.coded_resolution) { |
| self.surfaces.push_back(surface); |
| Ok(()) |
| } else { |
| Err(( |
| surface, |
| anyhow!( |
| "Surface does not fit within the pool's coded resolution. Update the pool resolution first." |
| ), |
| )) |
| } |
| } |
| |
| /// Gets a free surface from the pool |
| pub(crate) fn get_surface(&mut self) -> Option<Surface> { |
| let surface = self.surfaces.pop_front(); |
| |
| // Make sure the invariant holds when debugging. Can save costly |
| // debugging time during future refactors, if any. |
| debug_assert!({ |
| match surface.as_ref() { |
| Some(s) => Resolution::from(s.size()).can_contain(self.coded_resolution), |
| None => true, |
| } |
| }); |
| |
| surface |
| } |
| |
| /// Returns new number of surfaces left. |
| pub(crate) fn num_surfaces_left(&self) -> usize { |
| self.surfaces.len() |
| } |
| } |
| } |
| |
| /// A trait for providing the basic information needed to setup libva for decoding. |
| pub(crate) trait StreamInfo { |
| /// Returns the VA profile of the stream. |
| fn va_profile(&self) -> anyhow::Result<i32>; |
| /// Returns the RT format of the stream. |
| fn rt_format(&self) -> anyhow::Result<u32>; |
| /// Returns the minimum number of surfaces required to decode the stream. |
| fn min_num_surfaces(&self) -> usize; |
| /// Returns the coded size of the surfaces required to decode the stream. |
| fn coded_size(&self) -> (u32, u32); |
| /// Returns the visible rectangle within the coded size for the stream. |
| fn visible_rect(&self) -> ((u32, u32), (u32, u32)); |
| } |
| |
| pub(crate) struct ParsedStreamMetadata { |
| /// A VAContext from which we can decode from. |
| pub(crate) context: Rc<Context>, |
| /// The VAConfig that created the context. It must kept here so that |
| /// it does not get dropped while it is in use. |
| #[allow(dead_code)] |
| config: Config, |
| /// A pool of surfaces. We reuse surfaces as they are expensive to allocate. |
| pub(crate) surface_pool: Rc<RefCell<SurfacePool>>, |
| /// The number of surfaces required to parse the stream. |
| min_num_surfaces: usize, |
| /// The decoder current coded resolution. |
| coded_resolution: Resolution, |
| /// The decoder current display resolution. |
| display_resolution: Resolution, |
| /// The image format we will use to map the surfaces. This is usually the |
| /// same as the surface's internal format, but occasionally we can try |
| /// mapping in a different format if requested and if the VA-API driver can |
| /// do it. |
| map_format: Rc<libva::VAImageFormat>, |
| /// The rt_format parsed from the stream. |
| rt_format: u32, |
| /// The profile parsed from the stream. |
| profile: i32, |
| } |
| |
| /// State of the input stream, which can be either unparsed (we don't know the stream properties |
| /// yet) or parsed (we know the stream properties and are ready to decode). |
| pub(crate) enum StreamMetadataState { |
| /// The metadata for the current stream has not yet been parsed. |
| Unparsed, |
| /// The metadata for the current stream has been parsed and a suitable |
| /// VAContext has been created to accomodate it. |
| Parsed(ParsedStreamMetadata), |
| } |
| |
| impl StreamMetadataState { |
| /// Returns a reference to the parsed metadata state or an error if we haven't reached that |
| /// state yet. |
| pub(crate) fn get_parsed(&self) -> anyhow::Result<&ParsedStreamMetadata> { |
| match self { |
| StreamMetadataState::Unparsed { .. } => Err(anyhow!("Stream metadata not parsed yet")), |
| StreamMetadataState::Parsed(parsed_metadata) => Ok(parsed_metadata), |
| } |
| } |
| |
| /// Returns a mutable reference to the parsed metadata state or an error if we haven't reached |
| /// that state yet. |
| pub(crate) fn get_parsed_mut(&mut self) -> anyhow::Result<&mut ParsedStreamMetadata> { |
| match self { |
| StreamMetadataState::Unparsed { .. } => Err(anyhow!("Stream metadata not parsed yet")), |
| StreamMetadataState::Parsed(parsed_metadata) => Ok(parsed_metadata), |
| } |
| } |
| |
| /// Initializes or reinitializes the codec state. |
| fn open<S: StreamInfo>( |
| display: &Rc<Display>, |
| hdr: S, |
| format_map: Option<&FormatMap>, |
| old_metadata_state: StreamMetadataState, |
| ) -> anyhow::Result<StreamMetadataState> { |
| let va_profile = hdr.va_profile()?; |
| let rt_format = hdr.rt_format()?; |
| let (frame_w, frame_h) = hdr.coded_size(); |
| |
| let format_map = if let Some(format_map) = format_map { |
| format_map |
| } else { |
| // Pick the first one that fits |
| FORMAT_MAP |
| .iter() |
| .find(|&map| map.rt_format == rt_format) |
| .ok_or(anyhow!("Unsupported format {}", rt_format))? |
| }; |
| |
| let map_format = display |
| .query_image_formats()? |
| .iter() |
| .find(|f| f.fourcc == format_map.va_fourcc) |
| .cloned() |
| .ok_or_else(|| { |
| anyhow!( |
| "cannot find corresponding VA format for fourcc {:?}", |
| format_map.va_fourcc |
| ) |
| })?; |
| |
| let min_num_surfaces = hdr.min_num_surfaces(); |
| |
| let coded_resolution = Resolution { |
| width: frame_w, |
| height: frame_h, |
| }; |
| |
| let visible_rect = hdr.visible_rect(); |
| |
| let display_resolution = Resolution { |
| width: visible_rect.1 .0 - visible_rect.0 .0, |
| height: visible_rect.1 .1 - visible_rect.0 .1, |
| }; |
| |
| let (create_new_surfaces, surface_pool) = match old_metadata_state { |
| StreamMetadataState::Unparsed => ( |
| true, |
| Rc::new(RefCell::new(SurfacePool::new(vec![], coded_resolution))), |
| ), |
| StreamMetadataState::Parsed(ParsedStreamMetadata { |
| min_num_surfaces: old_min_num_surfaces, |
| ref surface_pool, |
| .. |
| }) => { |
| let create_new_surfaces = min_num_surfaces > old_min_num_surfaces |
| || !surface_pool |
| .borrow() |
| .coded_resolution() |
| .can_contain(coded_resolution); |
| |
| (create_new_surfaces, Rc::clone(surface_pool)) |
| } |
| }; |
| |
| if !surface_pool |
| .borrow() |
| .coded_resolution() |
| .can_contain(coded_resolution) |
| { |
| // Purge the old surfaces to receive the new ones below. This |
| // ensures that the pool is always set to the largest resolution in |
| // the stream, so that no new allocations are needed when we come |
| // across a smaller resolution. In particular, for |
| // video-conferencing applications, which are subject to bandwidth |
| // fluctuations, this can be very advantageous as it avoid |
| // reallocating all the time. |
| surface_pool |
| .borrow_mut() |
| .set_coded_resolution(coded_resolution); |
| } |
| |
| let (config, context) = match old_metadata_state { |
| // Reuse current context. |
| StreamMetadataState::Parsed(old_state) |
| if old_state.rt_format == rt_format && old_state.profile == va_profile => |
| { |
| (old_state.config, old_state.context) |
| } |
| // Create new context. |
| _ => { |
| let config = display.create_config( |
| vec![libva::VAConfigAttrib { |
| type_: libva::VAConfigAttribType::VAConfigAttribRTFormat, |
| value: rt_format, |
| }], |
| va_profile, |
| libva::VAEntrypoint::VAEntrypointVLD, |
| )?; |
| |
| let context = display.create_context(&config, frame_w, frame_h, None, true)?; |
| |
| (config, context) |
| } |
| }; |
| |
| if create_new_surfaces { |
| let surfaces = display.create_surfaces( |
| rt_format, |
| // Let the hardware decide the best internal format - we will get the desired fourcc |
| // when creating the image. |
| None, |
| frame_w, |
| frame_h, |
| Some(libva::UsageHint::USAGE_HINT_DECODER), |
| min_num_surfaces, |
| )?; |
| |
| for surface in surfaces { |
| surface_pool |
| .borrow_mut() |
| .add_surface(surface) |
| .map_err(|e| e.1)?; |
| } |
| } |
| |
| Ok(StreamMetadataState::Parsed(ParsedStreamMetadata { |
| context, |
| config, |
| surface_pool, |
| min_num_surfaces, |
| coded_resolution, |
| display_resolution, |
| map_format: Rc::new(map_format), |
| rt_format, |
| profile: va_profile, |
| })) |
| } |
| } |
| |
| /// VA-API backend handle. |
| /// |
| /// This includes the VA picture which can be pending rendering or complete, as well as useful |
| /// meta-information. |
| pub struct GenericBackendHandle { |
| state: PictureState, |
| /// The decoder resolution when this frame was processed. Not all codecs |
| /// send resolution data in every frame header. |
| coded_resolution: Resolution, |
| /// Actual resolution of the visible rectangle in the decoded buffer. |
| display_resolution: Resolution, |
| /// Image format for this surface, taken from the pool it originates from. |
| map_format: Rc<libva::VAImageFormat>, |
| /// A handle to the surface pool from which the backing surface originates. |
| surface_pool: Rc<RefCell<SurfacePool>>, |
| } |
| |
| impl Drop for GenericBackendHandle { |
| fn drop(&mut self) { |
| // Take ownership of the internal state. |
| let state = std::mem::replace(&mut self.state, PictureState::Invalid); |
| if let Ok(surface) = state.try_into() { |
| // It is OK if the pool rejects the surface. It means that the |
| // surface is stale and will be gracefully dropped. |
| if let Err((surface, _)) = self.surface_pool.borrow_mut().add_surface(surface) { |
| log::debug!( |
| "Dropping stale surface: {}, ({:?})", |
| surface.id(), |
| surface.size() |
| ) |
| } |
| } |
| } |
| } |
| |
| impl GenericBackendHandle { |
| /// Creates a new pending handle on `surface_id`. |
| fn new( |
| picture: libva::Picture<PictureNew>, |
| metadata: &ParsedStreamMetadata, |
| ) -> anyhow::Result<Self> { |
| let picture = picture.begin()?.render()?.end()?; |
| Ok(Self { |
| state: PictureState::Pending(picture), |
| coded_resolution: metadata.coded_resolution, |
| display_resolution: metadata.display_resolution, |
| map_format: Rc::clone(&metadata.map_format), |
| surface_pool: Rc::clone(&metadata.surface_pool), |
| }) |
| } |
| |
| fn sync(&mut self) -> Result<(), VaError> { |
| let res; |
| |
| (self.state, res) = match std::mem::replace(&mut self.state, PictureState::Invalid) { |
| state @ PictureState::Ready(_) => (state, Ok(())), |
| PictureState::Pending(picture) => match picture.sync() { |
| Ok(picture) => (PictureState::Ready(picture), Ok(())), |
| Err((e, picture)) => (PictureState::Pending(picture), Err(e)), |
| }, |
| PictureState::Invalid => unreachable!(), |
| }; |
| |
| res |
| } |
| |
| /// Returns a mapped VAImage. this maps the VASurface onto our address space. |
| /// This can be used in place of "DynMappableHandle::map()" if the client |
| /// wants to access the backend mapping directly for any reason. |
| /// |
| /// Note that DynMappableHandle is downcastable. |
| fn image(&mut self) -> anyhow::Result<Image> { |
| // Image can only be retrieved in the `Ready` state. |
| self.sync()?; |
| |
| match &self.state { |
| PictureState::Ready(picture) => { |
| // Map the VASurface onto our address space. |
| let image = picture.create_image( |
| *self.map_format, |
| self.display_resolution.width, |
| self.display_resolution.height, |
| )?; |
| |
| Ok(image) |
| } |
| // Either we are in `Ready` state or `sync` failed and we returned. |
| PictureState::Pending(_) | PictureState::Invalid => unreachable!(), |
| } |
| } |
| |
| /// Returns the picture of this handle. |
| pub(crate) fn picture(&self) -> Option<&libva::Picture<PictureSync>> { |
| match &self.state { |
| PictureState::Ready(picture) => Some(picture), |
| PictureState::Pending(_) => None, |
| PictureState::Invalid => unreachable!(), |
| } |
| } |
| |
| /// Returns the timestamp of this handle. |
| fn timestamp(&self) -> u64 { |
| match &self.state { |
| PictureState::Ready(picture) => picture.timestamp(), |
| PictureState::Pending(picture) => picture.timestamp(), |
| PictureState::Invalid => unreachable!(), |
| } |
| } |
| |
| /// Returns the id of the VA surface backing this handle. |
| pub(crate) fn surface_id(&self) -> libva::VASurfaceID { |
| match &self.state { |
| PictureState::Ready(picture) => picture.surface_id(), |
| PictureState::Pending(picture) => picture.surface_id(), |
| PictureState::Invalid => unreachable!(), |
| } |
| } |
| |
| fn is_va_ready(&self) -> Result<bool, VaError> { |
| match &self.state { |
| PictureState::Ready(_) => Ok(true), |
| PictureState::Pending(picture) => picture |
| .query_status() |
| .map(|s| s == libva::VASurfaceStatus::VASurfaceReady), |
| PictureState::Invalid => unreachable!(), |
| } |
| } |
| } |
| |
| impl DynHandle for GenericBackendHandle { |
| fn dyn_mappable_handle_mut<'a>(&'a mut self) -> Box<dyn MappableHandle + 'a> { |
| Box::new(self.image().unwrap()) |
| } |
| } |
| |
| /// Rendering state of a VA picture. |
| enum PictureState { |
| Ready(libva::Picture<PictureSync>), |
| Pending(libva::Picture<PictureEnd>), |
| // Only set in the destructor when we take ownership of the VA picture. |
| Invalid, |
| } |
| |
| impl<'a> MappableHandle for Image<'a> { |
| fn read(&mut self, buffer: &mut [u8]) -> VideoDecoderResult<()> { |
| let image_size = self.image_size(); |
| let image_inner = self.image(); |
| |
| let width = image_inner.width as usize; |
| let height = image_inner.height as usize; |
| |
| if buffer.len() != image_size { |
| return Err(VideoDecoderError::StatelessBackendError( |
| StatelessBackendError::Other(anyhow!( |
| "buffer size is {} while image size is {}", |
| buffer.len(), |
| image_size |
| )), |
| )); |
| } |
| |
| let pitches = image_inner.pitches.map(|x| x as usize); |
| let offsets = image_inner.offsets.map(|x| x as usize); |
| |
| match image_inner.format.fourcc { |
| libva::constants::VA_FOURCC_NV12 => { |
| nv12_copy(self.as_ref(), buffer, width, height, pitches, offsets); |
| } |
| libva::constants::VA_FOURCC_I420 => { |
| i4xx_copy( |
| self.as_ref(), |
| buffer, |
| width, |
| height, |
| pitches, |
| offsets, |
| (true, true), |
| ); |
| } |
| libva::constants::VA_FOURCC_422H => { |
| i4xx_copy( |
| self.as_ref(), |
| buffer, |
| width, |
| height, |
| pitches, |
| offsets, |
| (true, false), |
| ); |
| } |
| libva::constants::VA_FOURCC_444P => { |
| i4xx_copy( |
| self.as_ref(), |
| buffer, |
| width, |
| height, |
| pitches, |
| offsets, |
| (false, false), |
| ); |
| } |
| libva::constants::VA_FOURCC_P010 => { |
| p01x_to_i01x(self.as_ref(), buffer, 10, width, height, pitches, offsets); |
| } |
| libva::constants::VA_FOURCC_P012 => { |
| p01x_to_i01x(self.as_ref(), buffer, 12, width, height, pitches, offsets); |
| } |
| libva::constants::VA_FOURCC_Y210 => { |
| y21x_to_i21x(self.as_ref(), buffer, 10, width, height, pitches, offsets); |
| } |
| libva::constants::VA_FOURCC_Y212 => { |
| y21x_to_i21x(self.as_ref(), buffer, 12, width, height, pitches, offsets); |
| } |
| libva::constants::VA_FOURCC_Y410 => { |
| y410_to_i410(self.as_ref(), buffer, width, height, pitches, offsets); |
| } |
| libva::constants::VA_FOURCC_Y412 => { |
| y412_to_i412(self.as_ref(), buffer, width, height, pitches, offsets); |
| } |
| _ => { |
| return Err(crate::decoder::Error::StatelessBackendError( |
| StatelessBackendError::UnsupportedFormat, |
| )) |
| } |
| } |
| |
| Ok(()) |
| } |
| |
| fn image_size(&mut self) -> usize { |
| let image = self.image(); |
| |
| crate::decoded_frame_size( |
| (&image.format).try_into().unwrap(), |
| image.width as usize, |
| image.height as usize, |
| ) |
| } |
| } |
| |
| impl TryFrom<&libva::VAImageFormat> for DecodedFormat { |
| type Error = anyhow::Error; |
| |
| fn try_from(value: &libva::VAImageFormat) -> Result<Self, Self::Error> { |
| match value.fourcc { |
| libva::constants::VA_FOURCC_I420 => Ok(DecodedFormat::I420), |
| libva::constants::VA_FOURCC_NV12 => Ok(DecodedFormat::NV12), |
| libva::constants::VA_FOURCC_P010 => Ok(DecodedFormat::I010), |
| libva::constants::VA_FOURCC_P012 => Ok(DecodedFormat::I012), |
| libva::constants::VA_FOURCC_Y210 => Ok(DecodedFormat::I210), |
| libva::constants::VA_FOURCC_Y212 => Ok(DecodedFormat::I212), |
| libva::constants::VA_FOURCC_Y410 => Ok(DecodedFormat::I410), |
| libva::constants::VA_FOURCC_Y412 => Ok(DecodedFormat::I412), |
| _ => Err(anyhow!("Unsupported format")), |
| } |
| } |
| } |
| |
| pub(crate) struct VaapiBackend<StreamData> |
| where |
| for<'a> &'a StreamData: StreamInfo, |
| { |
| /// VA display in use for this stream. |
| display: Rc<Display>, |
| /// The metadata state. Updated whenever the decoder reads new data from the stream. |
| pub(crate) metadata_state: StreamMetadataState, |
| /// Make sure the backend is typed by stream information provider. |
| _stream_data: PhantomData<StreamData>, |
| } |
| |
| impl<StreamData> VaapiBackend<StreamData> |
| where |
| StreamData: Clone, |
| for<'a> &'a StreamData: StreamInfo, |
| { |
| pub(crate) fn new(display: Rc<libva::Display>) -> Self { |
| Self { |
| display, |
| metadata_state: StreamMetadataState::Unparsed, |
| _stream_data: PhantomData, |
| } |
| } |
| |
| pub(crate) fn new_sequence( |
| &mut self, |
| stream_params: &StreamData, |
| ) -> StatelessBackendResult<()> { |
| let old_metadata_state = |
| std::mem::replace(&mut self.metadata_state, StreamMetadataState::Unparsed); |
| |
| self.metadata_state = |
| StreamMetadataState::open(&self.display, stream_params, None, old_metadata_state)?; |
| |
| Ok(()) |
| } |
| |
| pub(crate) fn process_picture( |
| &mut self, |
| picture: libva::Picture<PictureNew>, |
| ) -> StatelessBackendResult<<Self as VideoDecoderBackend<StreamData>>::Handle> { |
| let metadata = self.metadata_state.get_parsed()?; |
| |
| Ok(Rc::new(RefCell::new(GenericBackendHandle::new( |
| picture, metadata, |
| )?))) |
| } |
| |
| /// Gets a set of supported formats for the particular stream being |
| /// processed. This requires that some buffers be processed before this call |
| /// is made. Only formats that are compatible with the current color space, |
| /// bit depth, and chroma format are returned such that no conversion is |
| /// needed. |
| fn supported_formats_for_stream(&self) -> anyhow::Result<HashSet<DecodedFormat>> { |
| let metadata = self.metadata_state.get_parsed()?; |
| let image_formats = self.display.query_image_formats()?; |
| |
| let formats = supported_formats_for_rt_format( |
| &self.display, |
| metadata.rt_format, |
| metadata.profile, |
| libva::VAEntrypoint::VAEntrypointVLD, |
| &image_formats, |
| )?; |
| |
| Ok(formats.into_iter().map(|f| f.decoded_format).collect()) |
| } |
| } |
| |
| impl<StreamData> VideoDecoderBackend<StreamData> for VaapiBackend<StreamData> |
| where |
| StreamData: Clone, |
| for<'a> &'a StreamData: StreamInfo, |
| { |
| type Handle = DecodedHandle; |
| |
| fn coded_resolution(&self) -> Option<Resolution> { |
| self.metadata_state |
| .get_parsed() |
| .map(|m| m.coded_resolution) |
| .ok() |
| } |
| |
| fn display_resolution(&self) -> Option<Resolution> { |
| self.metadata_state |
| .get_parsed() |
| .map(|m| m.display_resolution) |
| .ok() |
| } |
| |
| fn num_resources_total(&self) -> usize { |
| self.metadata_state |
| .get_parsed() |
| .map(|m| m.min_num_surfaces) |
| .unwrap_or(0) |
| } |
| |
| fn num_resources_left(&self) -> usize { |
| self.metadata_state |
| .get_parsed() |
| .map(|m| m.surface_pool.borrow().num_surfaces_left()) |
| .unwrap_or(0) |
| } |
| |
| fn format(&self) -> Option<crate::DecodedFormat> { |
| let map_format = self |
| .metadata_state |
| .get_parsed() |
| .map(|m| &m.map_format) |
| .ok()?; |
| DecodedFormat::try_from(map_format.as_ref()).ok() |
| } |
| |
| fn try_format( |
| &mut self, |
| format_info: &StreamData, |
| format: crate::DecodedFormat, |
| ) -> VideoDecoderResult<()> { |
| let supported_formats_for_stream = self.supported_formats_for_stream()?; |
| |
| if supported_formats_for_stream.contains(&format) { |
| let map_format = FORMAT_MAP |
| .iter() |
| .find(|&map| map.decoded_format == format) |
| .ok_or_else(|| { |
| anyhow!( |
| "cannot find corresponding VA format for decoded format {:?}", |
| format |
| ) |
| })?; |
| |
| let old_metadata_state = |
| std::mem::replace(&mut self.metadata_state, StreamMetadataState::Unparsed); |
| |
| // TODO: since we have established that it's best to let the VA |
| // driver choose the surface's internal (tiled) format, and map to |
| // the fourcc we want on-the-fly, this call to open() becomes |
| // redundant. |
| // |
| // Let's fix it at a later commit, because it involves other, |
| // non-related, cleanups. |
| // |
| // This does not apply to other (future) backends, like V4L2, which |
| // need to reallocate on format change. |
| self.metadata_state = StreamMetadataState::open( |
| &self.display, |
| format_info, |
| Some(map_format), |
| old_metadata_state, |
| )?; |
| |
| Ok(()) |
| } else { |
| Err(VideoDecoderError::StatelessBackendError( |
| StatelessBackendError::NegotiationFailed(anyhow!( |
| "Format {:?} is unsupported.", |
| format |
| )), |
| )) |
| } |
| } |
| } |
| |
| /// Copies `src` into `dst` removing all padding and converting from biplanar to triplanar format. |
| /// |
| /// `useful_pixels` is the number of useful pixels in each sample, e.g. `10` for `P010`, `12` for |
| /// `P012`, etc. |
| /// |
| /// This function is VAAPI-specific because of the unusual the source pixels are laid out: VAAPI |
| /// writes the `useful_pixels` MSBs, but software generally expects the LSBs to contain the data. |
| fn p01x_to_i01x( |
| src: &[u8], |
| dst: &mut [u8], |
| useful_pixels: usize, |
| width: usize, |
| height: usize, |
| strides: [usize; 3], |
| offsets: [usize; 3], |
| ) { |
| let sample_shift = 16 - useful_pixels; |
| |
| // Copy Y. |
| // |
| // VAAPI's Y samples are two byte little endian with the bottom six bits ignored. We need to |
| // convert that to two byte little endian with top 6 bits ignored. |
| |
| let src_y_lines = src[offsets[0]..] |
| .chunks(strides[0]) |
| .map(|line| &line[..width * 2]); |
| let dst_y_lines = dst.chunks_mut(width * 2); |
| |
| for (src_line, dst_line) in src_y_lines.zip(dst_y_lines).take(height) { |
| for (src_y, dst_y) in src_line.chunks(2).zip(dst_line.chunks_mut(2)) { |
| LittleEndian::write_u16(dst_y, LittleEndian::read_u16(src_y) >> sample_shift); |
| } |
| } |
| |
| let dst_u_offset = width * 2 * height; |
| |
| // Align width and height to 2 for UV plane. |
| let width = if width % 2 == 1 { width + 1 } else { width }; |
| let height = if height % 2 == 1 { height + 1 } else { height }; |
| // 1 sample per 4 pixels, but we have two components per line so width remains as-is. |
| let height = height / 2; |
| |
| let dst_u_size = width * height; |
| |
| // Copy U and V and deinterleave into different planes. |
| // |
| // We need to perform the same bit shift as luma, but also to de-interleave the data. |
| let src_uv_lines = src[offsets[1]..] |
| .chunks(strides[1]) |
| .map(|line| &line[..width * 2]); |
| let (dst_u_plane, dst_v_plane) = dst[dst_u_offset..].split_at_mut(dst_u_size); |
| let dst_u_lines = dst_u_plane.chunks_mut(width); |
| let dst_v_lines = dst_v_plane.chunks_mut(width); |
| for (src_line, (dst_u_line, dst_v_line)) in |
| src_uv_lines.zip(dst_u_lines.zip(dst_v_lines)).take(height) |
| { |
| for ((src_u, src_v), (dst_u, dst_v)) in src_line |
| .chunks(4) |
| .map(|chunk| (&chunk[0..2], &chunk[2..4])) |
| .zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))) |
| { |
| LittleEndian::write_u16(dst_u, LittleEndian::read_u16(src_u) >> sample_shift); |
| LittleEndian::write_u16(dst_v, LittleEndian::read_u16(src_v) >> sample_shift); |
| } |
| } |
| } |
| |
| /// Copies `src` into `dst` as I21x, removing all padding and changing the layout from packed to |
| /// triplanar. |
| /// |
| /// `useful_pixels` is the number of useful pixels in each sample, e.g. `10` for `Y210` or `16` for |
| /// `Y216`. |
| /// |
| /// This function is VAAPI-specific because of the unusual the source pixels are laid out: VAAPI |
| /// writes the `useful_pixels` MSBs, but software generally expects the LSBs to contain the data. |
| /// |
| /// WARNING: this function could not be tested for lack of supporting hardware. |
| fn y21x_to_i21x( |
| src: &[u8], |
| dst: &mut [u8], |
| useful_pixels: usize, |
| width: usize, |
| height: usize, |
| strides: [usize; 3], |
| offsets: [usize; 3], |
| ) { |
| let sample_shift = 16 - useful_pixels; |
| // Align width to 2 for U and V planes and divide by 2. |
| // This should not be necessary as the sampling method requires that width is a multiple of 2 |
| // to begin with. |
| let uv_width = if width % 2 == 1 { width + 1 } else { width } / 2; |
| |
| // YUYV representation, i.e. 4 16-bit words per two Y samples meaning we have 4 * width bytes |
| // of data per line. |
| let src_lines = src[offsets[0]..] |
| .chunks(strides[0]) |
| .map(|line| &line[..width * 4]); |
| |
| let dst_y_size = width * 2 * height; |
| let dst_u_size = uv_width * 2 * height; |
| |
| let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size); |
| let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size); |
| let dst_y_lines = dst_y_plane.chunks_mut(width * 2); |
| let dst_u_lines = dst_u_plane.chunks_mut(uv_width * 2); |
| let dst_v_lines = dst_v_plane.chunks_mut(uv_width * 2); |
| |
| for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in src_lines |
| .zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines))) |
| .take(height) |
| { |
| for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(8).zip( |
| dst_y_line |
| .chunks_mut(4) |
| .zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))), |
| ) { |
| let y0 = LittleEndian::read_u16(&src[0..2]) >> sample_shift; |
| let u = LittleEndian::read_u16(&src[2..4]) >> sample_shift; |
| let y1 = LittleEndian::read_u16(&src[4..6]) >> sample_shift; |
| let v = LittleEndian::read_u16(&src[6..8]) >> sample_shift; |
| |
| LittleEndian::write_u16(&mut dst_y[0..2], y0); |
| LittleEndian::write_u16(&mut dst_y[2..4], y1); |
| LittleEndian::write_u16(dst_u, u); |
| LittleEndian::write_u16(dst_v, v); |
| } |
| } |
| } |
| |
| /// Copies `src` into `dst` as I412, removing all padding and changing the layout from packed to |
| /// triplanar. Also drops the alpha channel. |
| /// |
| /// This function is VAAPI-specific because the samples need to be rolled somehow... |
| fn y412_to_i412( |
| src: &[u8], |
| dst: &mut [u8], |
| width: usize, |
| height: usize, |
| strides: [usize; 3], |
| offsets: [usize; 3], |
| ) { |
| let src_lines = src[offsets[0]..] |
| .chunks(strides[0]) |
| .map(|line| &line[..width * 8]); |
| |
| let dst_y_size = width * 2 * height; |
| let dst_u_size = width * 2 * height; |
| |
| let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size); |
| let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size); |
| let dst_y_lines = dst_y_plane.chunks_mut(width * 2); |
| let dst_u_lines = dst_u_plane.chunks_mut(width * 2); |
| let dst_v_lines = dst_v_plane.chunks_mut(width * 2); |
| |
| for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in src_lines |
| .zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines))) |
| .take(height) |
| { |
| for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(8).zip( |
| dst_y_line |
| .chunks_mut(2) |
| .zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))), |
| ) { |
| let y = LittleEndian::read_u16(&src[2..4]); |
| let u = LittleEndian::read_u16(&src[0..2]); |
| let v = LittleEndian::read_u16(&src[4..6]); |
| // Why is that rotate_right neeed?? |
| LittleEndian::write_u16(dst_y, y.rotate_right(4)); |
| LittleEndian::write_u16(dst_u, u.rotate_right(4)); |
| LittleEndian::write_u16(dst_v, v.rotate_right(4)); |
| } |
| } |
| } |