| /* |
| * Copyright © Microsoft Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "d3d12_context.h" |
| #include "d3d12_format.h" |
| #include "d3d12_resource.h" |
| #include "d3d12_screen.h" |
| #include "d3d12_surface.h" |
| #include "d3d12_video_dec.h" |
| #if VIDEO_CODEC_H264DEC |
| #include "d3d12_video_dec_h264.h" |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| #include "d3d12_video_dec_hevc.h" |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| #include "d3d12_video_dec_av1.h" |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| #include "d3d12_video_dec_vp9.h" |
| #endif |
| #include "d3d12_video_buffer.h" |
| #include "d3d12_residency.h" |
| |
| #include "vl/vl_video_buffer.h" |
| #include "util/format/u_format.h" |
| #include "util/u_inlines.h" |
| #include "util/u_memory.h" |
| #include "util/u_video.h" |
| |
| size_t |
| d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec) |
| { |
| return static_cast<size_t>(pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH); |
| } |
| |
| struct pipe_video_codec * |
| d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec) |
| { |
| /// |
| /// Initialize d3d12_video_decoder |
| /// |
| |
| |
| // Not using new doesn't call ctor and the initializations in the class declaration are lost |
| struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder; |
| |
| pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH); |
| |
| pD3D12Dec->base = *codec; |
| pD3D12Dec->m_screen = context->screen; |
| |
| pD3D12Dec->base.context = context; |
| pD3D12Dec->base.width = codec->width; |
| pD3D12Dec->base.height = codec->height; |
| // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock |
| // / get_feedback for encode) |
| pD3D12Dec->base.destroy = d3d12_video_decoder_destroy; |
| pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame; |
| pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream; |
| pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame; |
| pD3D12Dec->base.flush = d3d12_video_decoder_flush; |
| pD3D12Dec->base.fence_wait = d3d12_video_decoder_fence_wait; |
| pD3D12Dec->base.destroy_fence = d3d12_video_destroy_fence; |
| |
| pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile); |
| pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile); |
| pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile); |
| |
| /// |
| /// Try initializing D3D12 Video device and check for device caps |
| /// |
| |
| struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context; |
| pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen); |
| |
| /// |
| /// Create decode objects |
| /// |
| HRESULT hr = S_OK; |
| if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface( |
| IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n"); |
| goto failed; |
| } |
| |
| if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on " |
| "d3d12_video_decoder_check_caps_and_create_decoder\n"); |
| goto failed; |
| } |
| |
| if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { |
| debug_printf( |
| "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n"); |
| goto failed; |
| } |
| |
| if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on " |
| "d3d12_video_decoder_create_video_state_buffers\n"); |
| goto failed; |
| } |
| |
| pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat }; |
| hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, |
| &pD3D12Dec->m_decodeFormatInfo, |
| sizeof(pD3D12Dec->m_decodeFormatInfo)); |
| if (FAILED(hr)) { |
| debug_printf("CheckFeatureSupport failed with HR %x\n", hr); |
| goto failed; |
| } |
| |
| return &pD3D12Dec->base; |
| |
| failed: |
| if (pD3D12Dec != nullptr) { |
| d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec); |
| } |
| |
| return nullptr; |
| } |
| |
| /** |
| * Destroys a d3d12_video_decoder |
| * Call destroy_XX for applicable XX nested member types before deallocating |
| * Destroy methods should check != nullptr on their input target argument as this method can be called as part of |
| * cleanup from failure on the creation method |
| */ |
| void |
| d3d12_video_decoder_destroy(struct pipe_video_codec *codec) |
| { |
| if (codec == nullptr) { |
| return; |
| } |
| |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| // Flush and wait for completion of any in-flight GPU work before destroying objects |
| d3d12_video_decoder_flush(codec); |
| if (pD3D12Dec->m_fenceValue > 1 /* Check we submitted at least one frame */) { |
| d3d12_video_decoder_sync_completion(codec, (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH, OS_TIMEOUT_INFINITE); |
| struct pipe_fence_handle *context_queue_completion_fence = NULL; |
| pD3D12Dec->base.context->flush(pD3D12Dec->base.context, &context_queue_completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); |
| pD3D12Dec->m_pD3D12Screen->base.fence_finish(&pD3D12Dec->m_pD3D12Screen->base, NULL, context_queue_completion_fence, OS_TIMEOUT_INFINITE); |
| pD3D12Dec->m_pD3D12Screen->base.fence_reference(&pD3D12Dec->m_pD3D12Screen->base, &context_queue_completion_fence, NULL); |
| } |
| |
| // |
| // Destroys a decoder |
| // Call destroy_XX for applicable XX nested member types before deallocating |
| // Destroy methods should check != nullptr on their input target argument as this method can be called as part of |
| // cleanup from failure on the creation method |
| // |
| |
| // No need for d3d12_destroy_video_objects |
| // All the objects created here are smart pointer members of d3d12_video_decoder |
| // No need for d3d12_destroy_video_decoder_and_heap |
| // All the objects created here are smart pointer members of d3d12_video_decoder |
| // No need for d3d12_destroy_video_dpbmanagers |
| // All the objects created here are smart pointer members of d3d12_video_decoder |
| |
| // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder |
| |
| // Call dtor to make ComPtr work |
| delete pD3D12Dec; |
| } |
| |
| /** |
| * start decoding of a new frame |
| */ |
| void |
| d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec, |
| struct pipe_video_buffer *target, |
| struct pipe_picture_desc *picture) |
| { |
| // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in |
| // d3d12_video_decoder_decode_bitstream |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| assert(pD3D12Dec); |
| |
| /// |
| /// Wait here to make sure the next in flight resource set is empty before using it |
| /// |
| if (pD3D12Dec->m_fenceValue >= D3D12_VIDEO_DEC_ASYNC_DEPTH) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource " |
| "sets with previous work\n"); |
| ASSERTED bool wait_res = |
| d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH, OS_TIMEOUT_INFINITE); |
| assert(wait_res); |
| } |
| |
| HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset( |
| pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get()); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] resetting ID3D12GraphicsCommandList failed with HR %x\n", hr); |
| assert(false); |
| } |
| |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| } |
| |
| /** |
| * decode a bitstream |
| */ |
| void |
| d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec, |
| struct pipe_video_buffer *target, |
| struct pipe_picture_desc *picture, |
| unsigned num_buffers, |
| const void *const *buffers, |
| const unsigned *sizes) |
| { |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| assert(pD3D12Dec); |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| assert(pD3D12Dec->m_spDecodeCommandQueue); |
| assert(pD3D12Dec->m_pD3D12Screen); |
| ASSERTED struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target; |
| assert(pD3D12VideoBuffer); |
| |
| /// |
| /// Compressed bitstream buffers |
| /// |
| |
| /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED |
| /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 -> |
| /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If |
| /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3. |
| /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes |
| /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED |
| // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that: |
| // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data |
| // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate |
| // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all |
| // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed |
| // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start |
| // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at |
| // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we |
| // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does. |
| |
| // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the |
| // combined bitstream of all decode_bitstream calls. |
| |
| // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the |
| // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode |
| // (optional) , sliceN} |
| |
| if (num_buffers > 2) // Assume this means multiple slices at once in a decode_bitstream call |
| { |
| // Based on VA frontend codebase, this never happens for video (no JPEG) |
| // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream |
| |
| // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it |
| // was a series of different calls |
| |
| // group by start codes and buffers and perform calls for the number of slices |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected " |
| "for fenceValue: %d, breaking down the calls into one per slice\n", |
| pD3D12Dec->m_fenceValue); |
| |
| size_t curBufferIdx = 0; |
| |
| // Vars to be used for the delegation calls to decode_bitstream |
| unsigned call_num_buffers = 0; |
| const void *const *call_buffers = nullptr; |
| const unsigned *call_sizes = nullptr; |
| |
| while (curBufferIdx < num_buffers) { |
| // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a |
| // startcode+slicedata or just slicedata call |
| call_buffers = &buffers[curBufferIdx]; |
| call_sizes = &sizes[curBufferIdx]; |
| |
| // Usually start codes are less or equal than 4 bytes |
| // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the |
| // current buffer. |
| call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1; |
| |
| // Delegate call with one or two buffers only |
| d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes); |
| |
| curBufferIdx += call_num_buffers; // Consume from the loop the buffers sent in the last call |
| } |
| } else { |
| /// |
| /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0]. |
| /// |
| |
| // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be |
| // handled by flattening all the buffers into a single one and passing that to HW. |
| |
| size_t totalReceivedBuffersSize = 0u; // Combined size of all sizes[] |
| for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) { |
| totalReceivedBuffersSize += sizes[bufferIdx]; |
| } |
| |
| // Bytes of data pre-staged before this decode_frame call |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| size_t preStagedDataSize = inFlightResources.m_stagingDecodeBitstream.size(); |
| |
| // Extend the staging buffer size, as decode_frame can be called several times before end_frame |
| inFlightResources.m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize); |
| |
| // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new |
| // buffers will be appended |
| uint8_t *newSliceDataPositionDstBase = inFlightResources.m_stagingDecodeBitstream.data() + preStagedDataSize; |
| |
| // Append new data at the end. |
| size_t dstOffset = 0u; |
| for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) { |
| memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]); |
| dstOffset += sizes[bufferIdx]; |
| } |
| |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| } |
| |
| if (pD3D12Dec->m_d3d12DecProfileType == d3d12_video_decode_profile_type_h264) { |
| struct pipe_h264_picture_desc *h264 = (pipe_h264_picture_desc*) picture; |
| target->interlaced = !h264->pps->sps->frame_mbs_only_flag; |
| } |
| } |
| |
| void |
| d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec, |
| struct pipe_video_buffer *target, |
| struct pipe_picture_desc *picture) |
| { |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| pD3D12Dec->m_pCurrentDecodeTarget = target; |
| switch (pD3D12Dec->m_d3d12DecProfileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture; |
| pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref; |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| pipe_h265_picture_desc *pPicControlHevc = (pipe_h265_picture_desc *) picture; |
| pD3D12Dec->m_pCurrentReferenceTargets = pPicControlHevc->ref; |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture; |
| pD3D12Dec->m_pCurrentReferenceTargets = pPicControlAV1->ref; |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture; |
| pD3D12Dec->m_pCurrentReferenceTargets = pPicControlVP9->ref; |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| } |
| |
| /** |
| * end decoding of the current frame |
| */ |
| int |
| d3d12_video_decoder_end_frame(struct pipe_video_codec *codec, |
| struct pipe_video_buffer *target, |
| struct pipe_picture_desc *picture) |
| { |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| assert(pD3D12Dec); |
| struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen; |
| assert(pD3D12Screen); |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| assert(pD3D12Dec->m_spDecodeCommandQueue); |
| struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target; |
| assert(pD3D12VideoBuffer); |
| |
| /// |
| /// Store current decode output target texture and reference textures from upper layer |
| /// |
| d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture); |
| |
| /// |
| /// Codec header picture parameters buffers |
| /// |
| |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| |
| d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer); |
| assert(inFlightResources.m_picParamsBuffer.size() > 0); |
| |
| /// |
| /// Prepare Slice control buffers before clearing staging buffer |
| /// |
| assert(inFlightResources.m_stagingDecodeBitstream.size() > |
| 0); // Make sure the staging wasn't cleared yet in end_frame |
| d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture); |
| assert(inFlightResources.m_SliceControlBuffer.size() > 0); |
| |
| /// |
| /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer |
| /// |
| |
| uint64_t sliceDataStagingBufferSize = inFlightResources.m_stagingDecodeBitstream.size(); |
| uint8_t *sliceDataStagingBufferPtr = inFlightResources.m_stagingDecodeBitstream.data(); |
| |
| // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory |
| if (inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) { |
| if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " |
| "d3d12_video_decoder_create_staging_bitstream_buffer\n"); |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| assert(false); |
| return 1; |
| } |
| } |
| |
| // Upload frame bitstream CPU data to ID3D12Resource buffer |
| inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize = |
| sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize. |
| assert(inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize <= |
| inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize); |
| |
| /* One-shot transfer operation with data supplied in a user |
| * pointer. |
| */ |
| inFlightResources.pPipeCompressedBufferObj = |
| d3d12_resource_from_resource(&pD3D12Screen->base, inFlightResources.m_curFrameCompressedBitstreamBuffer.Get()); |
| assert(inFlightResources.pPipeCompressedBufferObj); |
| pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context |
| inFlightResources.pPipeCompressedBufferObj, // dst buffer |
| PIPE_MAP_WRITE, // usage PIPE_MAP_x |
| 0, // offset |
| static_cast<unsigned int>(sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize), // size |
| sliceDataStagingBufferPtr // data |
| ); |
| |
| // Flush buffer_subdata batch |
| // before deleting the source CPU buffer below |
| |
| pD3D12Dec->base.context->flush(pD3D12Dec->base.context, |
| &inFlightResources.m_pBitstreamUploadGPUCompletionFence, |
| PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); |
| assert(inFlightResources.m_pBitstreamUploadGPUCompletionFence); |
| // To be waited on GPU fence before flushing current frame DecodeFrame to GPU |
| |
| /// |
| /// Proceed to record the GPU Decode commands |
| /// |
| |
| // Requested conversions by caller upper layer (none for now) |
| d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {}; |
| |
| /// |
| /// Record DecodeFrame operation and resource state transitions. |
| /// |
| |
| // Translate input D3D12 structure |
| D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {}; |
| |
| d3d12InputArguments.CompressedBitstream.pBuffer = inFlightResources.m_curFrameCompressedBitstreamBuffer.Get(); |
| d3d12InputArguments.CompressedBitstream.Offset = 0u; |
| ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment = |
| 128u; // specified in |
| // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier |
| assert((d3d12InputArguments.CompressedBitstream.Offset == 0) || |
| ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0)); |
| d3d12InputArguments.CompressedBitstream.Size = inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize; |
| |
| D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { |
| CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, |
| D3D12_RESOURCE_STATE_COMMON, |
| D3D12_RESOURCE_STATE_VIDEO_DECODE_READ), |
| }; |
| pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); |
| |
| // Schedule reverse (back to common) transitions before command list closes for current frame |
| pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( |
| CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer, |
| D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, |
| D3D12_RESOURCE_STATE_COMMON)); |
| |
| /// |
| /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for |
| /// display/consumption |
| /// |
| ID3D12Resource *pOutputD3D12Texture; |
| uint outputD3D12Subresource = 0; |
| |
| /// |
| /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output |
| /// and to store as future reference in DPB |
| /// |
| ID3D12Resource *pRefOnlyOutputD3D12Texture; |
| uint refOnlyOutputD3D12Subresource = 0; |
| |
| if (!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec, |
| target, |
| pD3D12VideoBuffer, |
| &pOutputD3D12Texture, // output |
| &outputD3D12Subresource, // output |
| &pRefOnlyOutputD3D12Texture, // output |
| &refOnlyOutputD3D12Subresource, // output |
| requestedConversionArguments)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on " |
| "d3d12_video_decoder_prepare_for_decode_frame\n"); |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| assert(false); |
| return 1; |
| } |
| |
| /// |
| /// Set codec picture parameters CPU buffer |
| /// |
| |
| d3d12InputArguments.NumFrameArguments = |
| 1u; // Only the codec data received from the above layer with picture params |
| d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { |
| D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, |
| static_cast<uint32_t>(inFlightResources.m_picParamsBuffer.size()), |
| inFlightResources.m_picParamsBuffer.data(), |
| }; |
| |
| if (inFlightResources.m_SliceControlBuffer.size() > 0) { |
| d3d12InputArguments.NumFrameArguments++; |
| d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { |
| D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL, |
| static_cast<uint32_t>(inFlightResources.m_SliceControlBuffer.size()), |
| inFlightResources.m_SliceControlBuffer.data(), |
| }; |
| } |
| |
| if (inFlightResources.qp_matrix_frame_argument_enabled && |
| (inFlightResources.m_InverseQuantMatrixBuffer.size() > 0)) { |
| d3d12InputArguments.NumFrameArguments++; |
| d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = { |
| D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, |
| static_cast<uint32_t>(inFlightResources.m_InverseQuantMatrixBuffer.size()), |
| inFlightResources.m_InverseQuantMatrixBuffer.data(), |
| }; |
| } |
| |
| d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames(); |
| if (D3D12_DEBUG_VERBOSE & d3d12_debug) { |
| pD3D12Dec->m_spDPBManager->print_dpb(); |
| } |
| |
| d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get(); |
| |
| // translate output D3D12 structure |
| D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {}; |
| d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture; |
| d3d12OutputArguments.OutputSubresource = outputD3D12Subresource; |
| |
| bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags & |
| d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0; |
| if (fReferenceOnly) { |
| d3d12OutputArguments.ConversionArguments.Enable = true; |
| |
| assert(pRefOnlyOutputD3D12Texture); |
| d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture; |
| d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource; |
| |
| const D3D12_RESOURCE_DESC &descReference = GetDesc(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D); |
| d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space( |
| !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)), |
| util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/, |
| /* StudioRGB= */ false, |
| /* P709= */ true, |
| /* StudioYUV= */ true); |
| |
| const D3D12_RESOURCE_DESC &descOutput = GetDesc(d3d12OutputArguments.pOutputTexture2D); |
| d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space( |
| !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)), |
| util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/, |
| /* StudioRGB= */ false, |
| /* P709= */ true, |
| /* StudioYUV= */ true); |
| |
| const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = GetDesc(pD3D12Dec->m_spVideoDecoderHeap.Get()); |
| d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth; |
| d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight; |
| } else { |
| d3d12OutputArguments.ConversionArguments.Enable = false; |
| } |
| |
| CD3DX12_RESOURCE_DESC outputDesc(GetDesc(d3d12OutputArguments.pOutputTexture2D)); |
| uint32_t MipLevel, PlaneSlice, ArraySlice; |
| D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource, |
| outputDesc.MipLevels, |
| outputDesc.ArraySize(), |
| MipLevel, |
| ArraySlice, |
| PlaneSlice); |
| |
| for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { |
| uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); |
| |
| D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { |
| CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D, |
| D3D12_RESOURCE_STATE_COMMON, |
| D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, |
| planeOutputSubresource), |
| }; |
| pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); |
| } |
| |
| // Schedule reverse (back to common) transitions before command list closes for current frame |
| for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { |
| uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); |
| pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( |
| CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D, |
| D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, |
| D3D12_RESOURCE_STATE_COMMON, |
| planeOutputSubresource)); |
| } |
| |
| // Record DecodeFrame |
| |
| pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(), |
| &d3d12OutputArguments, |
| &d3d12InputArguments); |
| |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| |
| // Save extra references of Decoder, DecoderHeap and DPB allocations in case |
| // there's a reconfiguration that trigers the construction of new objects |
| inFlightResources.m_spDecoder = pD3D12Dec->m_spVideoDecoder; |
| inFlightResources.m_spDecoderHeap = pD3D12Dec->m_spVideoDecoderHeap; |
| inFlightResources.m_References = pD3D12Dec->m_spDPBManager; |
| |
| /// |
| /// Flush work to the GPU |
| /// |
| pD3D12Dec->m_needsGPUFlush = true; |
| d3d12_video_decoder_flush(codec); |
| // Call to d3d12_video_decoder_flush increases m_FenceValue |
| size_t inflightIndexBeforeFlush = static_cast<size_t>(pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH; |
| |
| if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { |
| // No need to copy, the output surface fence is merely the decode queue fence |
| if (picture->out_fence) |
| d3d12_fence_reference((struct d3d12_fence **)picture->out_fence, pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_fence.get()); |
| } else { |
| /// |
| /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation() |
| /// We cannot use the standalone video buffer allocation directly and we must use instead |
| /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same |
| /// allocation |
| /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes |
| /// |
| |
| // Get destination resource |
| struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target); |
| |
| // Get source pipe_resource |
| pipe_resource *pPipeSrc = |
| d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D); |
| assert(pPipeSrc); |
| |
| // GPU wait on the graphics context which will do the copy until the decode finishes |
| pD3D12Dec->base.context->fence_server_sync(pD3D12Dec->base.context, |
| (struct pipe_fence_handle *)pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_fence.get(), 0); |
| |
| // Copy all format subresources/texture planes |
| for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { |
| assert(d3d12OutputArguments.OutputSubresource < INT16_MAX); |
| struct pipe_box box; |
| u_box_3d(0, |
| 0, |
| // src array slice, taken as Z for TEXTURE_2D_ARRAY |
| static_cast<int16_t>(d3d12OutputArguments.OutputSubresource), |
| static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0), |
| static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0), |
| 1, &box); |
| |
| pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context, |
| pPipeDstViews[PlaneSlice]->texture, // dst |
| 0, // dst level |
| 0, // dstX |
| 0, // dstY |
| 0, // dstZ |
| (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next, // src |
| 0, // src level |
| &box); |
| } |
| // Flush resource_copy_region batch |
| // The output surface fence is the graphics queue that will signal after the copy ends |
| pD3D12Dec->base.context->flush(pD3D12Dec->base.context, picture->out_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH); |
| } |
| return 0; |
| } |
| |
| /** |
| * Get decoder fence. |
| */ |
| int |
| d3d12_video_decoder_fence_wait(struct pipe_video_codec *codec, struct pipe_fence_handle *fence, uint64_t timeout) |
| { |
| struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence; |
| assert(fenceValueToWaitOn); |
| |
| bool wait_res = d3d12_fence_finish(fenceValueToWaitOn, timeout); |
| if (wait_res) { |
| // Opportunistically reset batches |
| for (uint32_t i = 0; i < D3D12_VIDEO_DEC_ASYNC_DEPTH; ++i) |
| (void)d3d12_video_decoder_sync_completion(codec, i, 0); |
| } |
| |
| // Return semantics based on p_video_codec interface |
| // ret == 0 -> Decode in progress |
| // ret != 0 -> Decode completed |
| return wait_res ? 1 : 0; |
| } |
| |
| /** |
| * flush any outstanding command buffers to the hardware |
| * should be called before a video_buffer is acessed by the gallium frontend again |
| */ |
| void |
| d3d12_video_decoder_flush(struct pipe_video_codec *codec) |
| { |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| assert(pD3D12Dec); |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| assert(pD3D12Dec->m_spDecodeCommandQueue); |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on " |
| "fenceValue: %d\n", |
| pD3D12Dec->m_fenceValue); |
| |
| if (!pD3D12Dec->m_needsGPUFlush) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n"); |
| } else { |
| HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); |
| if (hr != S_OK) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush" |
| " - D3D12Device was removed BEFORE commandlist " |
| "execution with HR %x.\n", |
| hr); |
| goto flush_fail; |
| } |
| |
| if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) { |
| pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(static_cast<UINT>(pD3D12Dec->m_transitionsBeforeCloseCmdList.size()), |
| pD3D12Dec->m_transitionsBeforeCloseCmdList.data()); |
| pD3D12Dec->m_transitionsBeforeCloseCmdList.clear(); |
| } |
| |
| hr = pD3D12Dec->m_spDecodeCommandList->Close(); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr); |
| goto flush_fail; |
| } |
| |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() }; |
| struct d3d12_fence *pUploadBitstreamFence = d3d12_fence(inFlightResources.m_pBitstreamUploadGPUCompletionFence); |
| pD3D12Dec->m_spDecodeCommandQueue->Wait(pUploadBitstreamFence->cmdqueue_fence, pUploadBitstreamFence->value); |
| pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists); |
| pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue); |
| |
| // Validate device was not removed |
| hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); |
| if (hr != S_OK) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush" |
| " - D3D12Device was removed AFTER commandlist " |
| "execution with HR %x, but wasn't before.\n", |
| hr); |
| goto flush_fail; |
| } |
| |
| // Set async fence info |
| inFlightResources.m_fence.reset(d3d12_create_fence_raw(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue++)); |
| |
| pD3D12Dec->m_needsGPUFlush = false; |
| } |
| return; |
| |
| flush_fail: |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue); |
| assert(false); |
| } |
| |
| bool |
| d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen, |
| struct d3d12_video_decoder *pD3D12Dec) |
| { |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| |
| D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE }; |
| HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc, |
| IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf())); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue " |
| "failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&pD3D12Dec->m_spFence)); |
| if (FAILED(hr)) { |
| debug_printf( |
| "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| uint64_t CompletionFenceValue = pD3D12Dec->m_fenceValue; |
| for (auto &inputResource : pD3D12Dec->m_inflightResourcesPool) { |
| hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator( |
| D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, |
| IID_PPV_ARGS(inputResource.m_spCommandAllocator.GetAddressOf())); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to " |
| "CreateCommandAllocator failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| // Initialize fence for the in flight resource pool slot |
| inputResource.m_fence.reset(d3d12_create_fence_raw(pD3D12Dec->m_spFence.Get(), CompletionFenceValue++)); |
| } |
| |
| ComPtr<ID3D12Device4> spD3D12Device4; |
| if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) { |
| debug_printf( |
| "[d3d12_video_decoder] d3d12_video_decoder_create_decoder - D3D12 Device has no ID3D12Device4 support\n"); |
| return false; |
| } |
| |
| hr = spD3D12Device4->CreateCommandList1(0, |
| D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, |
| D3D12_COMMAND_LIST_FLAG_NONE, |
| IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf())); |
| |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList " |
| "failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool |
| d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen, |
| struct d3d12_video_decoder *pD3D12Dec) |
| { |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| |
| pD3D12Dec->m_decoderDesc = {}; |
| |
| D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile, |
| D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE, |
| D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE }; |
| |
| D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {}; |
| decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex; |
| decodeSupport.Configuration = decodeConfiguration; |
| decodeSupport.Width = pD3D12Dec->base.width; |
| decodeSupport.Height = pD3D12Dec->base.height; |
| decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat; |
| // no info from above layer on framerate/bitrate |
| decodeSupport.FrameRate.Numerator = 0; |
| decodeSupport.FrameRate.Denominator = 0; |
| decodeSupport.BitRate = 0; |
| |
| HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT, |
| &decodeSupport, |
| sizeof(decodeSupport)); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport " |
| "failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - " |
| "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n"); |
| return false; |
| } |
| |
| pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags; |
| pD3D12Dec->m_tier = decodeSupport.DecodeTier; |
| |
| if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) { |
| pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures; |
| } |
| |
| if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) { |
| pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height; |
| } |
| |
| if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) { |
| pD3D12Dec->m_ConfigDecoderSpecificFlags |= |
| d3d12_video_decode_config_specific_flag_reference_only_textures_required; |
| } |
| |
| pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask; |
| pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration; |
| |
| hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc, |
| IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf())); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder " |
| "failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool |
| d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen, |
| struct d3d12_video_decoder *pD3D12Dec) |
| { |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, |
| pD3D12Dec, |
| pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on " |
| "d3d12_video_decoder_create_staging_bitstream_buffer\n"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool |
| d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen, |
| struct d3d12_video_decoder *pD3D12Dec, |
| uint64_t bufSize) |
| { |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| if (inFlightResources.m_curFrameCompressedBitstreamBuffer.Get() != nullptr) { |
| inFlightResources.m_curFrameCompressedBitstreamBuffer.Reset(); |
| } |
| |
| auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask); |
| auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize); |
| HRESULT hr = pD3D12Screen->dev->CreateCommittedResource( |
| &descHeap, |
| D3D12_HEAP_FLAG_NONE, |
| &descResource, |
| D3D12_RESOURCE_STATE_COMMON, |
| nullptr, |
| IID_PPV_ARGS(inFlightResources.m_curFrameCompressedBitstreamBuffer.GetAddressOf())); |
| if (FAILED(hr)) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - " |
| "CreateCommittedResource failed with HR %x\n", |
| hr); |
| return false; |
| } |
| |
| inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize; |
| return true; |
| } |
| |
| bool |
| d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec, |
| struct pipe_video_buffer *pCurrentDecodeTarget, |
| struct d3d12_video_buffer *pD3D12VideoBuffer, |
| ID3D12Resource **ppOutTexture2D, |
| uint32_t *pOutSubresourceIndex, |
| ID3D12Resource **ppRefOnlyOutTexture2D, |
| uint32_t *pRefOnlyOutSubresourceIndex, |
| const d3d12_video_decode_output_conversion_arguments &conversionArgs) |
| { |
| if (!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) { |
| debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n"); |
| return false; |
| } |
| |
| // Refresh DPB active references for current frame, release memory for unused references. |
| d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec); |
| |
| // Get the output texture for the current frame to be decoded |
| pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget, |
| ppOutTexture2D, |
| pOutSubresourceIndex); |
| |
| auto vidBuffer = (struct d3d12_video_buffer *) (pCurrentDecodeTarget); |
| // If is_pipe_buffer_underlying_output_decode_allocation is enabled, |
| // we can just use the underlying allocation in pCurrentDecodeTarget |
| // and avoid an extra copy after decoding the frame. |
| // If this is the case, we need to handle the residency of this resource |
| // (if not we're actually creating the resources with CreateCommitedResource with |
| // residency by default) |
| if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) { |
| assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D); |
| // Make it permanently resident for video use |
| d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture); |
| } |
| |
| // Get the reference only texture for the current frame to be decoded (if applicable) |
| bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags & |
| d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0; |
| if (fReferenceOnly) { |
| bool needsTransitionToDecodeWrite = false; |
| pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget, |
| ppRefOnlyOutTexture2D, |
| pRefOnlyOutSubresourceIndex, |
| needsTransitionToDecodeWrite); |
| assert(needsTransitionToDecodeWrite); |
| |
| CD3DX12_RESOURCE_DESC outputDesc(GetDesc(*ppRefOnlyOutTexture2D)); |
| uint32_t MipLevel, PlaneSlice, ArraySlice; |
| D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex, |
| outputDesc.MipLevels, |
| outputDesc.ArraySize(), |
| MipLevel, |
| ArraySlice, |
| PlaneSlice); |
| |
| for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { |
| uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); |
| |
| D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = { |
| CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D, |
| D3D12_RESOURCE_STATE_COMMON, |
| D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, |
| planeOutputSubresource), |
| }; |
| pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode); |
| } |
| |
| // Schedule reverse (back to common) transitions before command list closes for current frame |
| for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) { |
| uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice); |
| pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back( |
| CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D, |
| D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, |
| D3D12_RESOURCE_STATE_COMMON, |
| planeOutputSubresource)); |
| } |
| } |
| |
| // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame |
| // otherwise, use the standard output resource |
| [[maybe_unused]] ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D; |
| [[maybe_unused]] uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex; |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| switch (pD3D12Dec->m_d3d12DecProfileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec, |
| pCurrentFrameDPBEntry, |
| currentFrameDPBEntrySubresource); |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| d3d12_video_decoder_prepare_current_frame_references_hevc(pD3D12Dec, |
| pCurrentFrameDPBEntry, |
| currentFrameDPBEntrySubresource); |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| d3d12_video_decoder_prepare_current_frame_references_av1(pD3D12Dec, |
| pCurrentFrameDPBEntry, |
| currentFrameDPBEntrySubresource); |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| d3d12_video_decoder_prepare_current_frame_references_vp9(pD3D12Dec, |
| pCurrentFrameDPBEntry, |
| currentFrameDPBEntrySubresource); |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| return true; |
| } |
| |
| bool |
| d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec, |
| struct d3d12_video_buffer *pD3D12VideoBuffer, |
| const d3d12_video_decode_output_conversion_arguments &conversionArguments) |
| { |
| uint32_t width; |
| uint32_t height; |
| uint16_t maxDPB; |
| d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB); |
| |
| ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); |
| D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource); |
| |
| D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested = |
| pD3D12VideoBuffer->base.interlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE; |
| if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) || |
| (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) { |
| // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder. |
| D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc; |
| decoderDesc.Configuration.InterlaceType = interlaceTypeRequested; |
| decoderDesc.Configuration.DecodeProfile = |
| d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType, pD3D12Dec->m_decodeFormat); |
| pD3D12Dec->m_spVideoDecoder.Reset(); |
| HRESULT hr = |
| pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc, |
| IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf())); |
| if (FAILED(hr)) { |
| debug_printf( |
| "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n", |
| hr); |
| return false; |
| } |
| // Update state after CreateVideoDecoder succeeds only. |
| pD3D12Dec->m_decoderDesc = decoderDesc; |
| } |
| |
| if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap || |
| pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width || |
| pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height || |
| pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) { |
| // Detect the combination of AOT/ReferenceOnly to configure the DPB manager |
| uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount + |
| 1 /*extra slot for current picture*/ : |
| maxDPB; |
| d3d12_video_decode_dpb_descriptor dpbDesc = {}; |
| dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width; |
| dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height; |
| dpbDesc.Format = |
| (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format; |
| dpbDesc.fArrayOfTexture = |
| ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0); |
| dpbDesc.dpbSize = referenceCount; |
| dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask; |
| dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags & |
| d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0); |
| |
| // Create DPB manager |
| if (pD3D12Dec->m_spDPBManager == nullptr) { |
| pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen, |
| pD3D12Dec->m_NodeMask, |
| pD3D12Dec->m_d3d12DecProfileType, |
| dpbDesc)); |
| } |
| |
| // |
| // (Re)-create decoder heap |
| // |
| D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {}; |
| decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask; |
| decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration; |
| decoderHeapDesc.DecodeWidth = static_cast<UINT>(dpbDesc.Width); |
| decoderHeapDesc.DecodeHeight = dpbDesc.Height; |
| decoderHeapDesc.Format = dpbDesc.Format; |
| decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB; |
| pD3D12Dec->m_spVideoDecoderHeap.Reset(); |
| HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap( |
| &decoderHeapDesc, |
| IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf())); |
| if (FAILED(hr)) { |
| debug_printf( |
| "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n", |
| hr); |
| return false; |
| } |
| // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only. |
| pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc; |
| } |
| |
| pD3D12Dec->m_decodeFormat = outputResourceDesc.Format; |
| |
| return true; |
| } |
| |
| void |
| d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec) |
| { |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| switch (pD3D12Dec->m_d3d12DecProfileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec); |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| d3d12_video_decoder_refresh_dpb_active_references_hevc(pD3D12Dec); |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| d3d12_video_decoder_refresh_dpb_active_references_av1(pD3D12Dec); |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| d3d12_video_decoder_refresh_dpb_active_references_vp9(pD3D12Dec); |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| } |
| |
| void |
| d3d12_video_decoder_get_frame_info( |
| struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB) |
| { |
| *pWidth = 0; |
| *pHeight = 0; |
| *pMaxDPB = 0; |
| |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| switch (pD3D12Dec->m_d3d12DecProfileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB); |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| d3d12_video_decoder_get_frame_info_hevc(pD3D12Dec, pWidth, pHeight, pMaxDPB); |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| d3d12_video_decoder_get_frame_info_av1(pD3D12Dec, pWidth, pHeight, pMaxDPB); |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| d3d12_video_decoder_get_frame_info_vp9(pD3D12Dec, pWidth, pHeight, pMaxDPB); |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| |
| if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) { |
| const uint32_t AlignmentMask = 31; |
| *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask; |
| } |
| } |
| |
| void |
| d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input( |
| struct d3d12_video_decoder *codec, // input argument, current decoder |
| struct pipe_picture_desc |
| *picture, // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name |
| struct d3d12_video_buffer *pD3D12VideoBuffer // input argument, target video buffer |
| ) |
| { |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| assert(picture); |
| assert(codec); |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| |
| d3d12_video_decode_profile_type profileType = |
| d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile); |
| ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture); |
| D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource); |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| inFlightResources.qp_matrix_frame_argument_enabled = false; |
| switch (profileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264); |
| pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture; |
| DXVA_PicParams_H264 dxvaPicParamsH264 = |
| d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue, |
| codec->base.profile, |
| static_cast<UINT>(outputResourceDesc.Width), |
| outputResourceDesc.Height, |
| pPicControlH264); |
| |
| d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, |
| &dxvaPicParamsH264, |
| dxvaPicParamsBufferSize); |
| |
| size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264); |
| DXVA_Qmatrix_H264 dxvaQmatrixH264 = {}; |
| d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, dxvaQmatrixH264); |
| inFlightResources.qp_matrix_frame_argument_enabled = |
| true; // We don't have a way of knowing from the pipe params so send always |
| d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize); |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_HEVC); |
| pipe_h265_picture_desc *pPicControlHEVC = (pipe_h265_picture_desc *) picture; |
| DXVA_PicParams_HEVC dxvaPicParamsHEVC = |
| d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(pD3D12Dec, codec->base.profile, pPicControlHEVC); |
| |
| d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, |
| &dxvaPicParamsHEVC, |
| dxvaPicParamsBufferSize); |
| |
| size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC); |
| DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {}; |
| inFlightResources.qp_matrix_frame_argument_enabled = false; |
| d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture, |
| dxvaQmatrixHEVC, |
| inFlightResources.qp_matrix_frame_argument_enabled); |
| d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize); |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_AV1); |
| pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture; |
| DXVA_PicParams_AV1 dxvaPicParamsAV1 = |
| d3d12_video_decoder_dxva_picparams_from_pipe_picparams_av1(pD3D12Dec->m_fenceValue, |
| codec->base.profile, |
| pPicControlAV1); |
| |
| d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsAV1, dxvaPicParamsBufferSize); |
| inFlightResources.qp_matrix_frame_argument_enabled = false; |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_VP9); |
| pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture; |
| DXVA_PicParams_VP9 dxvaPicParamsVP9 = |
| d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(pD3D12Dec, codec->base.profile, pPicControlVP9); |
| |
| d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsVP9, dxvaPicParamsBufferSize); |
| inFlightResources.qp_matrix_frame_argument_enabled = false; |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| } |
| |
| void |
| d3d12_video_decoder_prepare_dxva_slices_control( |
| struct d3d12_video_decoder *pD3D12Dec, // input argument, current decoder |
| struct pipe_picture_desc *picture) |
| { |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| [[maybe_unused]] auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| d3d12_video_decode_profile_type profileType = |
| d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile); |
| switch (profileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec, |
| inFlightResources.m_SliceControlBuffer, |
| (struct pipe_h264_picture_desc *) picture); |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec, |
| inFlightResources.m_SliceControlBuffer, |
| (struct pipe_h265_picture_desc *) picture); |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec, |
| inFlightResources.m_SliceControlBuffer, |
| (struct pipe_av1_picture_desc *) picture); |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec, |
| inFlightResources.m_SliceControlBuffer, |
| (struct pipe_vp9_picture_desc *) picture); |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| } |
| |
| void |
| d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec, |
| void *pDXVAStruct, |
| size_t DXVAStructSize) |
| { |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| if (inFlightResources.m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) { |
| inFlightResources.m_InverseQuantMatrixBuffer.reserve(DXVAStructSize); |
| } |
| |
| inFlightResources.m_InverseQuantMatrixBuffer.resize(DXVAStructSize); |
| memcpy(inFlightResources.m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize); |
| } |
| |
| void |
| d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec, |
| void *pDXVAStruct, |
| size_t DXVAStructSize) |
| { |
| auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]; |
| if (inFlightResources.m_picParamsBuffer.capacity() < DXVAStructSize) { |
| inFlightResources.m_picParamsBuffer.reserve(DXVAStructSize); |
| } |
| |
| inFlightResources.m_picParamsBuffer.resize(DXVAStructSize); |
| memcpy(inFlightResources.m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize); |
| } |
| |
| bool |
| d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport, |
| d3d12_video_decode_profile_type profileType) |
| { |
| bool supportedProfile = false; |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| switch (profileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| { |
| supportedProfile = true; |
| } break; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| supportedProfile = true; |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| { |
| supportedProfile = true; |
| } break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| supportedProfile = true; |
| } break; |
| #endif |
| default: |
| supportedProfile = false; |
| break; |
| } |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| |
| return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile; |
| } |
| |
| d3d12_video_decode_profile_type |
| d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile) |
| { |
| switch (profile) { |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: |
| return d3d12_video_decode_profile_type_h264; |
| case PIPE_VIDEO_PROFILE_HEVC_MAIN: |
| case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: |
| return d3d12_video_decode_profile_type_hevc; |
| case PIPE_VIDEO_PROFILE_AV1_MAIN: |
| return d3d12_video_decode_profile_type_av1; |
| case PIPE_VIDEO_PROFILE_VP9_PROFILE0: |
| case PIPE_VIDEO_PROFILE_VP9_PROFILE2: |
| return d3d12_video_decode_profile_type_vp9; |
| default: |
| { |
| unreachable("Unsupported pipe video profile"); |
| } break; |
| } |
| } |
| |
| GUID |
| d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile) |
| { |
| switch (profile) { |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: |
| case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10: |
| return D3D12_VIDEO_DECODE_PROFILE_H264; |
| case PIPE_VIDEO_PROFILE_HEVC_MAIN: |
| return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN; |
| case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: |
| return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10; |
| case PIPE_VIDEO_PROFILE_AV1_MAIN: |
| return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0; |
| case PIPE_VIDEO_PROFILE_VP9_PROFILE0: |
| return D3D12_VIDEO_DECODE_PROFILE_VP9; |
| case PIPE_VIDEO_PROFILE_VP9_PROFILE2: |
| return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2; |
| default: |
| return {}; |
| } |
| } |
| |
| GUID |
| d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType, DXGI_FORMAT decode_format) |
| { |
| #if D3D12_VIDEO_ANY_DECODER_ENABLED |
| switch (profileType) { |
| #if VIDEO_CODEC_H264DEC |
| case d3d12_video_decode_profile_type_h264: |
| return D3D12_VIDEO_DECODE_PROFILE_H264; |
| #endif |
| #if VIDEO_CODEC_H265DEC |
| case d3d12_video_decode_profile_type_hevc: |
| { |
| switch (decode_format) { |
| case DXGI_FORMAT_NV12: |
| return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN; |
| case DXGI_FORMAT_P010: |
| return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10; |
| default: |
| { |
| unreachable("Unsupported decode_format"); |
| } break; |
| } |
| } break; |
| #endif |
| #if VIDEO_CODEC_AV1DEC |
| case d3d12_video_decode_profile_type_av1: |
| return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0; |
| break; |
| #endif |
| #if VIDEO_CODEC_VP9DEC |
| case d3d12_video_decode_profile_type_vp9: |
| { |
| switch (decode_format) { |
| case DXGI_FORMAT_NV12: |
| return D3D12_VIDEO_DECODE_PROFILE_VP9; |
| case DXGI_FORMAT_P010: |
| return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2; |
| default: |
| { |
| unreachable("Unsupported decode_format"); |
| } break; |
| } |
| } break; |
| #endif |
| default: |
| { |
| unreachable("Unsupported d3d12_video_decode_profile_type"); |
| } break; |
| } |
| #else |
| return {}; |
| #endif // D3D12_VIDEO_ANY_DECODER_ENABLED |
| } |
| |
| bool |
| d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec, |
| uint32_t frame_index, |
| uint64_t timeout_ns) |
| { |
| struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec; |
| assert(pD3D12Dec); |
| assert(pD3D12Dec->m_spD3D12VideoDevice); |
| assert(pD3D12Dec->m_spDecodeCommandQueue); |
| HRESULT hr = S_OK; |
| |
| auto &pool_entry = pD3D12Dec->m_inflightResourcesPool[frame_index]; |
| if (!d3d12_fence_finish(pool_entry.m_fence.get(), timeout_ns)) |
| return false; |
| |
| // Release references granted on end_frame for this inflight operations |
| pool_entry.m_spDecoder.Reset(); |
| pool_entry.m_spDecoderHeap.Reset(); |
| pool_entry.m_References.reset(); |
| pool_entry.m_stagingDecodeBitstream.clear(); |
| pipe_resource_reference(&pool_entry.pPipeCompressedBufferObj, NULL); |
| |
| struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen; |
| assert(pD3D12Screen); |
| |
| pD3D12Screen->base.fence_reference( |
| &pD3D12Screen->base, |
| &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)] |
| .m_pBitstreamUploadGPUCompletionFence, |
| NULL); |
| |
| hr = pool_entry.m_spCommandAllocator->Reset(); |
| if (FAILED(hr)) { |
| debug_printf("failed with %x.\n", hr); |
| goto sync_with_token_fail; |
| } |
| |
| // Validate device was not removed |
| hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason(); |
| if (hr != S_OK) { |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion" |
| " - D3D12Device was removed AFTER d3d12_video_decoder_ensure_fence_finished " |
| "execution with HR %x, but wasn't before.\n", |
| hr); |
| goto sync_with_token_fail; |
| } |
| |
| debug_printf( |
| "[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for frame index: %u\n", |
| frame_index); |
| |
| return true; |
| |
| sync_with_token_fail: |
| debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for frame index: %u\n", |
| frame_index); |
| assert(false); |
| return false; |
| } |