blob: 0706a1ad4e53b6ddf15ab45f62d2dd37837d7434 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "ui/surface/accelerated_surface_transformer_win.h"
#include <vector>
#include "accelerated_surface_transformer_win_hlsl_compiled.h"
#include "base/debug/trace_event.h"
#include "base/memory/ref_counted.h"
#include "base/metrics/histogram.h"
#include "base/single_thread_task_runner.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "base/win/scoped_comptr.h"
#include "ui/gfx/native_widget_types.h"
#include "ui/gfx/rect.h"
#include "ui/gfx/size.h"
#include "ui/surface/d3d9_utils_win.h"
#include "ui/surface/surface_export.h"
using base::win::ScopedComPtr;
using std::vector;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU;
using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV;
namespace d3d_utils = ui_surface_d3d9_utils;
namespace {
struct Vertex {
float x, y, z, w;
float u, v;
};
const static D3DVERTEXELEMENT9 g_vertexElements[] = {
{ 0, 0, D3DDECLTYPE_FLOAT4, 0, D3DDECLUSAGE_POSITION, 0 },
{ 0, 16, D3DDECLTYPE_FLOAT2, 0, D3DDECLUSAGE_TEXCOORD, 0 },
D3DDECL_END()
};
class ScopedRenderTargetRestorer {
public:
ScopedRenderTargetRestorer(IDirect3DDevice9* device,
int render_target_id)
: device_(device),
target_id_(render_target_id) {
device_->GetRenderTarget(target_id_, original_render_target_.Receive());
}
~ScopedRenderTargetRestorer() {
device_->SetRenderTarget(target_id_, original_render_target_);
}
private:
ScopedComPtr<IDirect3DDevice9> device_;
int target_id_;
ScopedComPtr<IDirect3DSurface9> original_render_target_;
};
// Calculate the number necessary to transform |src_subrect| into |dst_size|
// by repeating downsampling of the image of |src_subrect| by a factor no more
// than 2.
int GetResampleCount(const gfx::Rect& src_subrect,
const gfx::Size& dst_size) {
// At least one copy is required, since the back buffer itself is not
// lockable.
int min_resample_count = 1;
int width_count = 0;
int width = src_subrect.width();
while (width > dst_size.width()) {
++width_count;
width >>= 1;
}
int height_count = 0;
int height = src_subrect.height();
while (height > dst_size.height()) {
++height_count;
height >>= 1;
}
return std::max(std::max(width_count, height_count),
min_resample_count);
}
// Returns half the size of |size| no smaller than |min_size|.
gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size,
const gfx::Size& min_size) {
return gfx::Size(std::max(min_size.width(), size.width() / 2),
std::max(min_size.height(), size.height() / 2));
}
} // namespace
AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer()
: device_supports_multiple_render_targets_(false),
vertex_shader_sources_(),
pixel_shader_sources_() {
// Associate passes with actual shader programs.
vertex_shader_sources_[ONE_TEXTURE] = kVsOneTexture;
pixel_shader_sources_[ONE_TEXTURE] = kPsOneTexture;
vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kVsFetch4Pixels;
pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kPsConvertRGBtoY8UV44;
vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kVsFetch2Pixels;
pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kPsConvertUV44toU2V2;
vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kVsFetch4Pixels;
pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kPsConvertRGBtoY;
vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kVsFetch4PixelsScale2;
pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kPsConvertRGBtoU;
vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kVsFetch4PixelsScale2;
pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kPsConvertRGBtoV;
COMPILE_ASSERT(NUM_SHADERS == 6, must_initialize_shader_sources);
}
bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) {
bool result = DoInit(device);
if (!result) {
ReleaseAll();
}
return result;
}
bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) {
device_ = device;
{
D3DCAPS9 caps;
HRESULT hr = device->GetDeviceCaps(&caps);
if (FAILED(hr))
return false;
device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2);
// Log statistics about which paths we take.
UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT",
device_supports_multiple_render_targets());
}
// Force compilation of all shaders that could be used on this GPU.
if (!CompileShaderCombo(ONE_TEXTURE))
return false;
if (device_supports_multiple_render_targets()) {
if (!CompileShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2) ||
!CompileShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2)) {
return false;
}
} else {
if (!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3) ||
!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3) ||
!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3)) {
return false;
}
}
COMPILE_ASSERT(NUM_SHADERS == 6, must_compile_at_doinit);
ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration;
HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements,
vertex_declaration.Receive());
if (FAILED(hr))
return false;
hr = device_->SetVertexDeclaration(vertex_declaration);
if (FAILED(hr))
return false;
return true;
}
bool AcceleratedSurfaceTransformer::CompileShaderCombo(
ShaderCombo shader) {
if (!vertex_shaders_[shader]) {
HRESULT hr = device_->CreateVertexShader(
reinterpret_cast<const DWORD*>(vertex_shader_sources_[shader]),
vertex_shaders_[shader].Receive());
if (FAILED(hr))
return false;
for (int i = 0; i < NUM_SHADERS; ++i) {
if (vertex_shader_sources_[i] == vertex_shader_sources_[shader] &&
i != shader) {
vertex_shaders_[i] = vertex_shaders_[shader];
}
}
}
if (!pixel_shaders_[shader]) {
HRESULT hr = device_->CreatePixelShader(
reinterpret_cast<const DWORD*>(pixel_shader_sources_[shader]),
pixel_shaders_[shader].Receive());
if (FAILED(hr))
return false;
for (int i = 0; i < NUM_SHADERS; ++i) {
if (pixel_shader_sources_[i] == pixel_shader_sources_[shader] &&
i != shader) {
pixel_shaders_[i] = pixel_shaders_[shader];
}
}
}
return true;
}
void AcceleratedSurfaceTransformer::ReleaseAll() {
for (int i = 0; i < NUM_SHADERS; i++) {
vertex_shaders_[i] = NULL;
pixel_shaders_[i] = NULL;
}
user_scratch_texture_ = NULL;
uv_scratch_texture_ = NULL;
y_scratch_surface_ = NULL;
u_scratch_surface_ = NULL;
v_scratch_surface_ = NULL;
for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
scaler_scratch_surfaces_[i] = NULL;
device_ = NULL;
}
void AcceleratedSurfaceTransformer::DetachAll() {
for (int i = 0; i < NUM_SHADERS; i++) {
vertex_shaders_[i].Detach();
pixel_shaders_[i].Detach();
}
user_scratch_texture_.Detach();
uv_scratch_texture_.Detach();
y_scratch_surface_.Detach();
u_scratch_surface_.Detach();
v_scratch_surface_.Detach();
for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
scaler_scratch_surfaces_[i].Detach();
device_.Detach();
}
bool AcceleratedSurfaceTransformer::CopyInverted(
IDirect3DTexture9* src_texture,
IDirect3DSurface9* dst_surface,
const gfx::Size& dst_size) {
return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, -1.0f);
}
bool AcceleratedSurfaceTransformer::Copy(
IDirect3DTexture9* src_texture,
IDirect3DSurface9* dst_surface,
const gfx::Size& dst_size) {
return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, 1.0f);
}
bool AcceleratedSurfaceTransformer::CopyWithTextureScale(
IDirect3DTexture9* src_texture,
IDirect3DSurface9* dst_surface,
const gfx::Size& dst_size,
float texture_scale_x,
float texture_scale_y) {
if (!SetShaderCombo(ONE_TEXTURE))
return false;
// Set the kTextureScale vertex shader constant, which is assigned to
// register 1.
float texture_scale[4] = {texture_scale_x, texture_scale_y, 0, 0};
device()->SetVertexShaderConstantF(1, texture_scale, 1);
ScopedRenderTargetRestorer render_target_restorer(device(), 0);
device()->SetRenderTarget(0, dst_surface);
device()->SetTexture(0, src_texture);
D3DVIEWPORT9 viewport = {
0, 0,
dst_size.width(), dst_size.height(),
0, 1
};
device()->SetViewport(&viewport);
if (d3d_utils::GetSize(src_texture) == dst_size) {
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
} else {
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
}
device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
DrawScreenAlignedQuad(dst_size);
// Clear surface references.
device()->SetTexture(0, NULL);
return true;
}
void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad(
const gfx::Size& size) {
const float target_size[4] = { size.width(), size.height(), 0, 0};
// Set the uniform shader constant |kRenderTargetSize|, which is bound
// to register c0.
device()->SetVertexShaderConstantF(0, target_size, 1);
// We always send down the same vertices. The vertex program will take
// care of doing resolution-dependent position adjustment.
Vertex vertices[] = {
{ -1, +1, 0.5f, 1, 0, 0 },
{ +1, +1, 0.5f, 1, 1, 0 },
{ +1, -1, 0.5f, 1, 1, 1 },
{ -1, -1, 0.5f, 1, 0, 1 }
};
device()->BeginScene();
device()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN,
2,
vertices,
sizeof(vertices[0]));
device()->EndScene();
}
bool AcceleratedSurfaceTransformer::GetIntermediateTexture(
const gfx::Size& size,
IDirect3DTexture9** texture,
IDirect3DSurface9** texture_level_zero) {
if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
size,
&user_scratch_texture_,
texture_level_zero))
return false;
*texture = ScopedComPtr<IDirect3DTexture9>(user_scratch_texture_).Detach();
return true;
}
// Resize an RGB surface using repeated linear interpolation.
bool AcceleratedSurfaceTransformer::ResizeBilinear(
IDirect3DSurface9* src_surface,
const gfx::Rect& src_subrect,
IDirect3DSurface9* dst_surface,
const gfx::Rect& dst_rect) {
COMPILE_ASSERT(arraysize(scaler_scratch_surfaces_) == 2, surface_count);
gfx::Size src_size = src_subrect.size();
gfx::Size dst_size = dst_rect.size();
if (src_size.IsEmpty() || dst_size.IsEmpty())
return false;
HRESULT hr = S_OK;
// Set up intermediate buffers needed for downsampling.
const int resample_count = GetResampleCount(src_subrect, dst_size);
const gfx::Size half_size =
GetHalfSizeNoLessThan(src_subrect.size(), dst_size);
if (resample_count > 1) {
if (!d3d_utils::CreateOrReuseLockableSurface(device(),
half_size,
&scaler_scratch_surfaces_[0]))
return false;
}
if (resample_count > 2) {
const gfx::Size quarter_size = GetHalfSizeNoLessThan(half_size, dst_size);
if (!d3d_utils::CreateOrReuseLockableSurface(device(),
quarter_size,
&scaler_scratch_surfaces_[1]))
return false;
}
// Repeat downsampling the surface until its size becomes identical to
// |dst_size|. We keep the factor of each downsampling no more than two
// because using a factor more than two can introduce aliasing.
RECT read_rect = src_subrect.ToRECT();
gfx::Size write_size = half_size;
int read_buffer_index = 1;
int write_buffer_index = 0;
for (int i = 0; i < resample_count; ++i) {
TRACE_EVENT0("gpu", "StretchRect");
IDirect3DSurface9* read_buffer =
(i == 0) ? src_surface : scaler_scratch_surfaces_[read_buffer_index];
IDirect3DSurface9* write_buffer;
RECT write_rect;
if (i == resample_count - 1) {
write_buffer = dst_surface;
write_rect = dst_rect.ToRECT();
} else {
write_buffer = scaler_scratch_surfaces_[write_buffer_index];
write_rect = gfx::Rect(write_size).ToRECT();
}
hr = device()->StretchRect(read_buffer,
&read_rect,
write_buffer,
&write_rect,
D3DTEXF_LINEAR);
if (FAILED(hr))
return false;
read_rect = write_rect;
write_size = GetHalfSizeNoLessThan(write_size, dst_size);
std::swap(read_buffer_index, write_buffer_index);
}
return true;
}
bool AcceleratedSurfaceTransformer::TransformRGBToYV12(
IDirect3DTexture9* src_surface,
const gfx::Size& dst_size,
IDirect3DSurface9** dst_y,
IDirect3DSurface9** dst_u,
IDirect3DSurface9** dst_v) {
gfx::Size packed_y_size;
gfx::Size packed_uv_size;
if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size,
dst_y, dst_u, dst_v)) {
return false;
}
if (device_supports_multiple_render_targets()) {
return TransformRGBToYV12_MRT(src_surface,
dst_size,
packed_y_size,
packed_uv_size,
*dst_y,
*dst_u,
*dst_v);
} else {
return TransformRGBToYV12_WithoutMRT(src_surface,
dst_size,
packed_y_size,
packed_uv_size,
*dst_y,
*dst_u,
*dst_v);
}
}
bool AcceleratedSurfaceTransformer::ReadFast(IDirect3DSurface9* gpu_surface,
uint8* dst,
int dst_bytes_per_row,
int dst_num_rows,
int dst_stride) {
// TODO(nick): Compared to GetRenderTargetData, LockRect+memcpy is 50% faster
// on some systems, but 100x slower on others. We should have logic here to
// choose the best path, probably by adaptively trying both and picking the
// faster one. http://crbug.com/168532
return ReadByGetRenderTargetData(gpu_surface, dst, dst_bytes_per_row,
dst_num_rows, dst_stride);
}
bool AcceleratedSurfaceTransformer::ReadByLockAndCopy(
IDirect3DSurface9* gpu_surface,
uint8* dst,
int dst_bytes_per_row,
int dst_num_rows,
int dst_stride) {
D3DLOCKED_RECT locked_rect;
{
TRACE_EVENT0("gpu", "LockRect");
HRESULT hr = gpu_surface->LockRect(&locked_rect, NULL,
D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK);
if (FAILED(hr)) {
LOG(ERROR) << "Failed to lock surface";
return false;
}
}
{
TRACE_EVENT0("gpu", "memcpy");
uint8* dst_row = dst;
uint8* src_row = reinterpret_cast<uint8*>(locked_rect.pBits);
for (int i = 0; i < dst_num_rows; i++) {
memcpy(dst_row, src_row, dst_bytes_per_row);
src_row += locked_rect.Pitch;
dst_row += dst_stride;
}
}
gpu_surface->UnlockRect();
return true;
}
bool AcceleratedSurfaceTransformer::ReadByGetRenderTargetData(
IDirect3DSurface9* gpu_surface,
uint8* dst,
int dst_bytes_per_row,
int dst_num_rows,
int dst_stride) {
HRESULT hr = 0;
ScopedComPtr<IDirect3DSurface9> system_surface;
gfx::Size src_size = d3d_utils::GetSize(gpu_surface);
// Depending on pitch and alignment, we might be able to wrap |dst| in an
// offscreen- plain surface for a direct copy.
const bool direct_copy = (dst_stride == dst_bytes_per_row &&
src_size.width() * 4 == dst_bytes_per_row &&
dst_num_rows >= src_size.height());
{
TRACE_EVENT0("gpu", "CreateOffscreenPlainSurface");
HANDLE handle = reinterpret_cast<HANDLE>(dst);
hr = device()->CreateOffscreenPlainSurface(src_size.width(),
src_size.height(),
D3DFMT_A8R8G8B8,
D3DPOOL_SYSTEMMEM,
system_surface.Receive(),
direct_copy ? &handle : NULL);
if (!SUCCEEDED(hr)) {
LOG(ERROR) << "Failed to create offscreen plain surface.";
return false;
}
}
{
TRACE_EVENT0("gpu", "GetRenderTargetData");
hr = device()->GetRenderTargetData(gpu_surface, system_surface);
if (FAILED(hr)) {
LOG(ERROR) << "Failed GetRenderTargetData";
return false;
}
}
if (direct_copy) {
// We're done: |system_surface| is a wrapper around |dst|.
return true;
} else {
// Extra memcpy required from |system_surface| to |dst|.
return ReadByLockAndCopy(system_surface, dst, dst_bytes_per_row,
dst_num_rows, dst_stride);
}
}
bool AcceleratedSurfaceTransformer::AllocYUVBuffers(
const gfx::Size& dst_size,
gfx::Size* y_size,
gfx::Size* uv_size,
IDirect3DSurface9** dst_y,
IDirect3DSurface9** dst_u,
IDirect3DSurface9** dst_v) {
// Y is full height, packed into 4 components.
*y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height());
// U and V are half the size (rounded up) of Y.
*uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2);
if (!d3d_utils::CreateOrReuseLockableSurface(device(), *y_size,
&y_scratch_surface_)) {
return false;
}
if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
&u_scratch_surface_)) {
return false;
}
if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
&v_scratch_surface_)) {
return false;
}
*dst_y = ScopedComPtr<IDirect3DSurface9>(y_scratch_surface_).Detach();
*dst_u = ScopedComPtr<IDirect3DSurface9>(u_scratch_surface_).Detach();
*dst_v = ScopedComPtr<IDirect3DSurface9>(v_scratch_surface_).Detach();
return true;
}
bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT(
IDirect3DTexture9* src_surface,
const gfx::Size& dst_size,
const gfx::Size& packed_y_size,
const gfx::Size& packed_uv_size,
IDirect3DSurface9* dst_y,
IDirect3DSurface9* dst_u,
IDirect3DSurface9* dst_v) {
TRACE_EVENT0("gpu", "RGBToYV12_MRT");
ScopedRenderTargetRestorer color0_restorer(device(), 0);
ScopedRenderTargetRestorer color1_restorer(device(), 1);
// Create an intermediate surface to hold the UUVV values. This is color
// target 1 for the first pass, and texture 0 for the second pass. Its
// values are not read afterwards.
ScopedComPtr<IDirect3DSurface9> uv_as_surface;
if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
packed_y_size,
&uv_scratch_texture_,
uv_as_surface.Receive())) {
return false;
}
// Clamping is required if (dst_size.width() % 8 != 0) or if
// (dst_size.height != 0), so we set it always. Both passes rely on this.
device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
/////////////////////////////////////////
// Pass 1: RGB --(scaled)--> YYYY + UUVV
SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2);
// Enable bilinear filtering if scaling is required. The filtering will take
// place entirely in the first pass.
if (d3d_utils::GetSize(src_surface) != dst_size) {
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
} else {
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
}
device()->SetTexture(0, src_surface);
device()->SetRenderTarget(0, dst_y);
device()->SetRenderTarget(1, uv_as_surface);
DrawScreenAlignedQuad(dst_size);
/////////////////////////////////////////
// Pass 2: UUVV -> UUUU + VVVV
SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2);
// The second pass uses bilinear minification to achieve vertical scaling,
// so enable it always.
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
device()->SetTexture(0, uv_scratch_texture_);
device()->SetRenderTarget(0, dst_u);
device()->SetRenderTarget(1, dst_v);
DrawScreenAlignedQuad(packed_y_size);
// Clear surface references.
device()->SetTexture(0, NULL);
return true;
}
bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT(
IDirect3DTexture9* src_surface,
const gfx::Size& dst_size,
const gfx::Size& packed_y_size,
const gfx::Size& packed_uv_size,
IDirect3DSurface9* dst_y,
IDirect3DSurface9* dst_u,
IDirect3DSurface9* dst_v) {
TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT");
ScopedRenderTargetRestorer color0_restorer(device(), 0);
ScopedComPtr<IDirect3DTexture9> scaled_src_surface;
// If scaling is requested, do it to a temporary texture. The MRT path
// gets a scale for free, so we need to support it here too (even though
// it's an extra operation).
if (d3d_utils::GetSize(src_surface) == dst_size) {
scaled_src_surface = src_surface;
} else {
ScopedComPtr<IDirect3DSurface9> dst_level0;
if (!d3d_utils::CreateOrReuseRenderTargetTexture(
device(), dst_size, &uv_scratch_texture_, dst_level0.Receive())) {
return false;
}
if (!Copy(src_surface, dst_level0, dst_size)) {
return false;
}
scaled_src_surface = uv_scratch_texture_;
}
// Input texture is the same for all three passes.
device()->SetTexture(0, scaled_src_surface);
// Clamping is required if (dst_size.width() % 8 != 0) or if
// (dst_size.height != 0), so we set it always. All passes rely on this.
device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
/////////////////////
// Pass 1: RGB -> Y.
SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3);
// Pass 1 just needs point sampling.
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
device()->SetRenderTarget(0, dst_y);
DrawScreenAlignedQuad(dst_size);
// Passes 2 and 3 rely on bilinear minification to downsample U and V.
device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
/////////////////////
// Pass 2: RGB -> U.
SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3);
device()->SetRenderTarget(0, dst_u);
DrawScreenAlignedQuad(dst_size);
/////////////////////
// Pass 3: RGB -> V.
SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3);
device()->SetRenderTarget(0, dst_v);
DrawScreenAlignedQuad(dst_size);
// Clear surface references.
device()->SetTexture(0, NULL);
return true;
}
IDirect3DDevice9* AcceleratedSurfaceTransformer::device() {
return device_;
}
bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) {
// Compile shaders on first use, if needed. Normally the compilation should
// already have happened at Init() time, but test code might force
// us down an unusual path.
if (!CompileShaderCombo(combo))
return false;
HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]);
if (!SUCCEEDED(hr))
return false;
hr = device()->SetPixelShader(pixel_shaders_[combo]);
if (!SUCCEEDED(hr))
return false;
return true;
}