ui/surface/accelerated_surface_transformer_win.cc - platform/external/chromium_org - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "ui/surface/accelerated_surface_transformer_win.h"

 #include <vector>

 #include "accelerated_surface_transformer_win_hlsl_compiled.h"
 #include "base/debug/trace_event.h"
 #include "base/memory/ref_counted.h"
 #include "base/metrics/histogram.h"
 #include "base/single_thread_task_runner.h"
 #include "base/synchronization/lock.h"
 #include "base/synchronization/waitable_event.h"
 #include "base/win/scoped_comptr.h"
 #include "ui/gfx/native_widget_types.h"
 #include "ui/gfx/rect.h"
 #include "ui/gfx/size.h"
 #include "ui/surface/d3d9_utils_win.h"
 #include "ui/surface/surface_export.h"

 using base::win::ScopedComPtr;
 using std::vector;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU;
 using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV;

 namespace d3d_utils = ui_surface_d3d9_utils;

 namespace {

 struct Vertex {
   float x, y, z, w;
   float u, v;
 };

 const static D3DVERTEXELEMENT9 g_vertexElements[] = {
   { 0, 0, D3DDECLTYPE_FLOAT4, 0, D3DDECLUSAGE_POSITION, 0 },
   { 0, 16, D3DDECLTYPE_FLOAT2, 0, D3DDECLUSAGE_TEXCOORD, 0 },
   D3DDECL_END()
 };

 class ScopedRenderTargetRestorer {
  public:
   ScopedRenderTargetRestorer(IDirect3DDevice9* device,
                              int render_target_id)
     : device_(device),
       target_id_(render_target_id) {
     device_->GetRenderTarget(target_id_, original_render_target_.Receive());
   }
   ~ScopedRenderTargetRestorer() {
     device_->SetRenderTarget(target_id_, original_render_target_);
   }
  private:
   ScopedComPtr<IDirect3DDevice9> device_;
   int target_id_;
   ScopedComPtr<IDirect3DSurface9> original_render_target_;
 };

 // Calculate the number necessary to transform |src_subrect| into |dst_size|
 // by repeating downsampling of the image of |src_subrect| by a factor no more
 // than 2.
 int GetResampleCount(const gfx::Rect& src_subrect,
                      const gfx::Size& dst_size) {
   // At least one copy is required, since the back buffer itself is not
   // lockable.
   int min_resample_count = 1;
   int width_count = 0;
   int width = src_subrect.width();
   while (width > dst_size.width()) {
     ++width_count;
     width >>= 1;
   }
   int height_count = 0;
   int height = src_subrect.height();
   while (height > dst_size.height()) {
     ++height_count;
     height >>= 1;
   }
   return std::max(std::max(width_count, height_count),
                   min_resample_count);
 }

 // Returns half the size of |size| no smaller than |min_size|.
 gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size,
                                 const gfx::Size& min_size) {
   return gfx::Size(std::max(min_size.width(), size.width() / 2),
                    std::max(min_size.height(), size.height() / 2));
 }

 }  // namespace

 AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer()
     : device_supports_multiple_render_targets_(false),
       vertex_shader_sources_(),
       pixel_shader_sources_() {

   // Associate passes with actual shader programs.
   vertex_shader_sources_[ONE_TEXTURE] = kVsOneTexture;
   pixel_shader_sources_[ONE_TEXTURE] = kPsOneTexture;

   vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kVsFetch4Pixels;
   pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_1_OF_2] = kPsConvertRGBtoY8UV44;

   vertex_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kVsFetch2Pixels;
   pixel_shader_sources_[RGB_TO_YV12_FAST__PASS_2_OF_2] = kPsConvertUV44toU2V2;

   vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kVsFetch4Pixels;
   pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_1_OF_3] = kPsConvertRGBtoY;

   vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kVsFetch4PixelsScale2;
   pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_2_OF_3] = kPsConvertRGBtoU;

   vertex_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kVsFetch4PixelsScale2;
   pixel_shader_sources_[RGB_TO_YV12_SLOW__PASS_3_OF_3] = kPsConvertRGBtoV;

   COMPILE_ASSERT(NUM_SHADERS == 6, must_initialize_shader_sources);
 }

 bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) {
   bool result = DoInit(device);
   if (!result) {
     ReleaseAll();
   }
   return result;
 }

 bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) {
   device_ = device;

   {
     D3DCAPS9 caps;
     HRESULT hr = device->GetDeviceCaps(&caps);
     if (FAILED(hr))
       return false;

     device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2);

     // Log statistics about which paths we take.
     UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT",
                           device_supports_multiple_render_targets());
   }

   // Force compilation of all shaders that could be used on this GPU.
   if (!CompileShaderCombo(ONE_TEXTURE))
     return false;

   if (device_supports_multiple_render_targets()) {
     if (!CompileShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2) ||
         !CompileShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2)) {
       return false;
     }
   } else {
     if (!CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3) ||
         !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3) ||
         !CompileShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3)) {
       return false;
     }
   }
   COMPILE_ASSERT(NUM_SHADERS == 6, must_compile_at_doinit);

   ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration;
   HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements,
                                                 vertex_declaration.Receive());
   if (FAILED(hr))
     return false;
   hr = device_->SetVertexDeclaration(vertex_declaration);
   if (FAILED(hr))
     return false;

   return true;
 }

 bool AcceleratedSurfaceTransformer::CompileShaderCombo(
     ShaderCombo shader) {
   if (!vertex_shaders_[shader]) {
     HRESULT hr = device_->CreateVertexShader(
         reinterpret_cast<const DWORD*>(vertex_shader_sources_[shader]),
         vertex_shaders_[shader].Receive());

     if (FAILED(hr))
       return false;

     for (int i = 0; i < NUM_SHADERS; ++i) {
       if (vertex_shader_sources_[i] == vertex_shader_sources_[shader] &&
           i != shader) {
         vertex_shaders_[i] = vertex_shaders_[shader];
       }
     }
   }

   if (!pixel_shaders_[shader]) {
     HRESULT hr = device_->CreatePixelShader(
         reinterpret_cast<const DWORD*>(pixel_shader_sources_[shader]),
         pixel_shaders_[shader].Receive());

     if (FAILED(hr))
       return false;

     for (int i = 0; i < NUM_SHADERS; ++i) {
       if (pixel_shader_sources_[i] == pixel_shader_sources_[shader] &&
           i != shader) {
         pixel_shaders_[i] = pixel_shaders_[shader];
       }
     }
   }

   return true;
 }

 void AcceleratedSurfaceTransformer::ReleaseAll() {
   for (int i = 0; i < NUM_SHADERS; i++) {
     vertex_shaders_[i] = NULL;
     pixel_shaders_[i] = NULL;
   }

   user_scratch_texture_ = NULL;
   uv_scratch_texture_ = NULL;
   y_scratch_surface_ = NULL;
   u_scratch_surface_ = NULL;
   v_scratch_surface_ = NULL;
   for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
     scaler_scratch_surfaces_[i] = NULL;

   device_ = NULL;
 }
 void AcceleratedSurfaceTransformer::DetachAll() {
   for (int i = 0; i < NUM_SHADERS; i++) {
     vertex_shaders_[i].Detach();
     pixel_shaders_[i].Detach();
   }

   user_scratch_texture_.Detach();
   uv_scratch_texture_.Detach();
   y_scratch_surface_.Detach();
   u_scratch_surface_.Detach();
   v_scratch_surface_.Detach();
   for (int i = 0; i < arraysize(scaler_scratch_surfaces_); i++)
     scaler_scratch_surfaces_[i].Detach();

   device_.Detach();
 }

 bool AcceleratedSurfaceTransformer::CopyInverted(
     IDirect3DTexture9* src_texture,
     IDirect3DSurface9* dst_surface,
     const gfx::Size& dst_size) {
   return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, -1.0f);
 }

 bool AcceleratedSurfaceTransformer::Copy(
     IDirect3DTexture9* src_texture,
     IDirect3DSurface9* dst_surface,
     const gfx::Size& dst_size) {
   return CopyWithTextureScale(src_texture, dst_surface, dst_size, 1.0f, 1.0f);
 }

 bool AcceleratedSurfaceTransformer::CopyWithTextureScale(
     IDirect3DTexture9* src_texture,
     IDirect3DSurface9* dst_surface,
     const gfx::Size& dst_size,
     float texture_scale_x,
     float texture_scale_y) {

   if (!SetShaderCombo(ONE_TEXTURE))
     return false;

   // Set the kTextureScale vertex shader constant, which is assigned to
   // register 1.
   float texture_scale[4] = {texture_scale_x, texture_scale_y, 0, 0};
   device()->SetVertexShaderConstantF(1, texture_scale, 1);

   ScopedRenderTargetRestorer render_target_restorer(device(), 0);
   device()->SetRenderTarget(0, dst_surface);
   device()->SetTexture(0, src_texture);

   D3DVIEWPORT9 viewport = {
     0, 0,
     dst_size.width(), dst_size.height(),
     0, 1
   };
   device()->SetViewport(&viewport);

   if (d3d_utils::GetSize(src_texture) == dst_size) {
     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
   } else {
     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
   }
   device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
   device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);

   DrawScreenAlignedQuad(dst_size);

   // Clear surface references.
   device()->SetTexture(0, NULL);
   return true;
 }

 void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad(
     const gfx::Size& size) {
   const float target_size[4] = { size.width(), size.height(), 0, 0};

   // Set the uniform shader constant |kRenderTargetSize|, which is bound
   // to register c0.
   device()->SetVertexShaderConstantF(0, target_size, 1);

   // We always send down the same vertices. The vertex program will take
   // care of doing resolution-dependent position adjustment.
   Vertex vertices[] = {
     { -1, +1, 0.5f, 1, 0, 0 },
     { +1, +1, 0.5f, 1, 1, 0 },
     { +1, -1, 0.5f, 1, 1, 1 },
     { -1, -1, 0.5f, 1, 0, 1 }
   };

   device()->BeginScene();
   device()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN,
                             2,
                             vertices,
                             sizeof(vertices[0]));
   device()->EndScene();

 }

 bool AcceleratedSurfaceTransformer::GetIntermediateTexture(
     const gfx::Size& size,
     IDirect3DTexture9** texture,
     IDirect3DSurface9** texture_level_zero) {
   if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
                                                    size,
                                                    &user_scratch_texture_,
                                                    texture_level_zero))
     return false;

   *texture = ScopedComPtr<IDirect3DTexture9>(user_scratch_texture_).Detach();
   return true;
 }

 // Resize an RGB surface using repeated linear interpolation.
 bool AcceleratedSurfaceTransformer::ResizeBilinear(
     IDirect3DSurface9* src_surface,
     const gfx::Rect& src_subrect,
     IDirect3DSurface9* dst_surface,
     const gfx::Rect& dst_rect) {
   COMPILE_ASSERT(arraysize(scaler_scratch_surfaces_) == 2, surface_count);

   gfx::Size src_size = src_subrect.size();
   gfx::Size dst_size = dst_rect.size();

   if (src_size.IsEmpty() || dst_size.IsEmpty())
     return false;

   HRESULT hr = S_OK;
   // Set up intermediate buffers needed for downsampling.
   const int resample_count = GetResampleCount(src_subrect, dst_size);
   const gfx::Size half_size =
       GetHalfSizeNoLessThan(src_subrect.size(), dst_size);
   if (resample_count > 1) {
     if (!d3d_utils::CreateOrReuseLockableSurface(device(),
                                                  half_size,
                                                  &scaler_scratch_surfaces_[0]))
       return false;
   }
   if (resample_count > 2) {
     const gfx::Size quarter_size = GetHalfSizeNoLessThan(half_size, dst_size);
     if (!d3d_utils::CreateOrReuseLockableSurface(device(),
                                                  quarter_size,
                                                  &scaler_scratch_surfaces_[1]))
       return false;
   }

   // Repeat downsampling the surface until its size becomes identical to
   // |dst_size|. We keep the factor of each downsampling no more than two
   // because using a factor more than two can introduce aliasing.
   RECT read_rect = src_subrect.ToRECT();
   gfx::Size write_size = half_size;
   int read_buffer_index = 1;
   int write_buffer_index = 0;
   for (int i = 0; i < resample_count; ++i) {
     TRACE_EVENT0("gpu", "StretchRect");
     IDirect3DSurface9* read_buffer =
         (i == 0) ? src_surface : scaler_scratch_surfaces_[read_buffer_index];
     IDirect3DSurface9* write_buffer;
     RECT write_rect;
     if (i == resample_count - 1) {
       write_buffer = dst_surface;
       write_rect = dst_rect.ToRECT();
     } else {
       write_buffer = scaler_scratch_surfaces_[write_buffer_index];
       write_rect = gfx::Rect(write_size).ToRECT();
     }

     hr = device()->StretchRect(read_buffer,
                                &read_rect,
                                write_buffer,
                                &write_rect,
                                D3DTEXF_LINEAR);

     if (FAILED(hr))
       return false;
     read_rect = write_rect;
     write_size = GetHalfSizeNoLessThan(write_size, dst_size);
     std::swap(read_buffer_index, write_buffer_index);
   }

   return true;
 }

 bool AcceleratedSurfaceTransformer::TransformRGBToYV12(
     IDirect3DTexture9* src_surface,
     const gfx::Size& dst_size,
     IDirect3DSurface9** dst_y,
     IDirect3DSurface9** dst_u,
     IDirect3DSurface9** dst_v) {
   gfx::Size packed_y_size;
   gfx::Size packed_uv_size;
   if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size,
                        dst_y, dst_u, dst_v)) {
     return false;
   }

   if (device_supports_multiple_render_targets()) {
     return TransformRGBToYV12_MRT(src_surface,
                                   dst_size,
                                   packed_y_size,
                                   packed_uv_size,
                                   *dst_y,
                                   *dst_u,
                                   *dst_v);
   } else {
     return TransformRGBToYV12_WithoutMRT(src_surface,
                                          dst_size,
                                          packed_y_size,
                                          packed_uv_size,
                                          *dst_y,
                                          *dst_u,
                                          *dst_v);
   }
 }

 bool AcceleratedSurfaceTransformer::ReadFast(IDirect3DSurface9* gpu_surface,
                                              uint8* dst,
                                              int dst_bytes_per_row,
                                              int dst_num_rows,
                                              int dst_stride) {
   // TODO(nick): Compared to GetRenderTargetData, LockRect+memcpy is 50% faster
   // on some systems, but 100x slower on others. We should have logic here to
   // choose the best path, probably by adaptively trying both and picking the
   // faster one. http://crbug.com/168532
   return ReadByGetRenderTargetData(gpu_surface, dst, dst_bytes_per_row,
                                    dst_num_rows, dst_stride);
 }

 bool AcceleratedSurfaceTransformer::ReadByLockAndCopy(
     IDirect3DSurface9* gpu_surface,
     uint8* dst,
     int dst_bytes_per_row,
     int dst_num_rows,
     int dst_stride) {
   D3DLOCKED_RECT locked_rect;
   {
     TRACE_EVENT0("gpu", "LockRect");
     HRESULT hr = gpu_surface->LockRect(&locked_rect, NULL,
                                        D3DLOCK_READONLY | D3DLOCK_NOSYSLOCK);
     if (FAILED(hr)) {
       LOG(ERROR) << "Failed to lock surface";
       return false;
     }
   }

   {
     TRACE_EVENT0("gpu", "memcpy");
     uint8* dst_row = dst;
     uint8* src_row = reinterpret_cast<uint8*>(locked_rect.pBits);
     for (int i = 0; i < dst_num_rows; i++) {
       memcpy(dst_row, src_row, dst_bytes_per_row);
       src_row += locked_rect.Pitch;
       dst_row += dst_stride;
     }
   }
   gpu_surface->UnlockRect();
   return true;
 }

 bool AcceleratedSurfaceTransformer::ReadByGetRenderTargetData(
     IDirect3DSurface9* gpu_surface,
     uint8* dst,
     int dst_bytes_per_row,
     int dst_num_rows,
     int dst_stride) {
   HRESULT hr = 0;
   ScopedComPtr<IDirect3DSurface9> system_surface;
   gfx::Size src_size = d3d_utils::GetSize(gpu_surface);

   // Depending on pitch and alignment, we might be able to wrap |dst| in an
   // offscreen- plain surface for a direct copy.
   const bool direct_copy = (dst_stride == dst_bytes_per_row &&
                             src_size.width() * 4 == dst_bytes_per_row &&
                             dst_num_rows >= src_size.height());

   {
     TRACE_EVENT0("gpu", "CreateOffscreenPlainSurface");
     HANDLE handle = reinterpret_cast<HANDLE>(dst);
     hr = device()->CreateOffscreenPlainSurface(src_size.width(),
                                                src_size.height(),
                                                D3DFMT_A8R8G8B8,
                                                D3DPOOL_SYSTEMMEM,
                                                system_surface.Receive(),
                                                direct_copy ? &handle : NULL);
     if (!SUCCEEDED(hr)) {
       LOG(ERROR) << "Failed to create offscreen plain surface.";
       return false;
     }
   }

   {
     TRACE_EVENT0("gpu", "GetRenderTargetData");
     hr = device()->GetRenderTargetData(gpu_surface, system_surface);
     if (FAILED(hr)) {
       LOG(ERROR) << "Failed GetRenderTargetData";
       return false;
     }
   }

   if (direct_copy) {
     // We're done: |system_surface| is a wrapper around |dst|.
     return true;
   } else {
     // Extra memcpy required from |system_surface| to |dst|.
     return ReadByLockAndCopy(system_surface, dst, dst_bytes_per_row,
                              dst_num_rows, dst_stride);
   }
 }

 bool AcceleratedSurfaceTransformer::AllocYUVBuffers(
     const gfx::Size& dst_size,
     gfx::Size* y_size,
     gfx::Size* uv_size,
     IDirect3DSurface9** dst_y,
     IDirect3DSurface9** dst_u,
     IDirect3DSurface9** dst_v) {

   // Y is full height, packed into 4 components.
   *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height());

   // U and V are half the size (rounded up) of Y.
   *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2);

   if (!d3d_utils::CreateOrReuseLockableSurface(device(), *y_size,
                                                &y_scratch_surface_)) {
     return false;
   }
   if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
                                                &u_scratch_surface_)) {
     return false;
   }
   if (!d3d_utils::CreateOrReuseLockableSurface(device(), *uv_size,
                                                &v_scratch_surface_)) {
     return false;
   }

   *dst_y = ScopedComPtr<IDirect3DSurface9>(y_scratch_surface_).Detach();
   *dst_u = ScopedComPtr<IDirect3DSurface9>(u_scratch_surface_).Detach();
   *dst_v = ScopedComPtr<IDirect3DSurface9>(v_scratch_surface_).Detach();

   return true;
 }

 bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT(
     IDirect3DTexture9* src_surface,
     const gfx::Size& dst_size,
     const gfx::Size& packed_y_size,
     const gfx::Size& packed_uv_size,
     IDirect3DSurface9* dst_y,
     IDirect3DSurface9* dst_u,
     IDirect3DSurface9* dst_v) {
   TRACE_EVENT0("gpu", "RGBToYV12_MRT");

   ScopedRenderTargetRestorer color0_restorer(device(), 0);
   ScopedRenderTargetRestorer color1_restorer(device(), 1);

   // Create an intermediate surface to hold the UUVV values. This is color
   // target 1 for the first pass, and texture 0 for the second pass. Its
   // values are not read afterwards.

   ScopedComPtr<IDirect3DSurface9> uv_as_surface;
   if (!d3d_utils::CreateOrReuseRenderTargetTexture(device(),
                                                    packed_y_size,
                                                    &uv_scratch_texture_,
                                                    uv_as_surface.Receive())) {
     return false;
   }

   // Clamping is required if (dst_size.width() % 8 != 0) or if
   // (dst_size.height != 0), so we set it always. Both passes rely on this.
   device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
   device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);

   /////////////////////////////////////////
   // Pass 1: RGB --(scaled)--> YYYY + UUVV
   SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2);

   // Enable bilinear filtering if scaling is required. The filtering will take
   // place entirely in the first pass.
   if (d3d_utils::GetSize(src_surface) != dst_size) {
     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
   } else {
     device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
     device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
   }

   device()->SetTexture(0, src_surface);
   device()->SetRenderTarget(0, dst_y);
   device()->SetRenderTarget(1, uv_as_surface);
   DrawScreenAlignedQuad(dst_size);

   /////////////////////////////////////////
   // Pass 2: UUVV -> UUUU + VVVV
   SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2);

   // The second pass uses bilinear minification to achieve vertical scaling,
   // so enable it always.
   device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
   device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);

   device()->SetTexture(0, uv_scratch_texture_);
   device()->SetRenderTarget(0, dst_u);
   device()->SetRenderTarget(1, dst_v);
   DrawScreenAlignedQuad(packed_y_size);

   // Clear surface references.
   device()->SetTexture(0, NULL);
   return true;
 }

 bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT(
     IDirect3DTexture9* src_surface,
     const gfx::Size& dst_size,
     const gfx::Size& packed_y_size,
     const gfx::Size& packed_uv_size,
     IDirect3DSurface9* dst_y,
     IDirect3DSurface9* dst_u,
     IDirect3DSurface9* dst_v) {
   TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT");

   ScopedRenderTargetRestorer color0_restorer(device(), 0);

   ScopedComPtr<IDirect3DTexture9> scaled_src_surface;

   // If scaling is requested, do it to a temporary texture. The MRT path
   // gets a scale for free, so we need to support it here too (even though
   // it's an extra operation).
   if (d3d_utils::GetSize(src_surface) == dst_size) {
     scaled_src_surface = src_surface;
   } else {
     ScopedComPtr<IDirect3DSurface9> dst_level0;
     if (!d3d_utils::CreateOrReuseRenderTargetTexture(
             device(), dst_size, &uv_scratch_texture_, dst_level0.Receive())) {
       return false;
     }
     if (!Copy(src_surface, dst_level0, dst_size)) {
       return false;
     }
     scaled_src_surface = uv_scratch_texture_;
   }

   // Input texture is the same for all three passes.
   device()->SetTexture(0, scaled_src_surface);

   // Clamping is required if (dst_size.width() % 8 != 0) or if
   // (dst_size.height != 0), so we set it always. All passes rely on this.
   device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
   device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);

   /////////////////////
   // Pass 1: RGB -> Y.
   SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3);

   // Pass 1 just needs point sampling.
   device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
   device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);

   device()->SetRenderTarget(0, dst_y);
   DrawScreenAlignedQuad(dst_size);

   // Passes 2 and 3 rely on bilinear minification to downsample U and V.
   device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
   device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);

   /////////////////////
   // Pass 2: RGB -> U.
   SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3);
   device()->SetRenderTarget(0, dst_u);
   DrawScreenAlignedQuad(dst_size);

   /////////////////////
   // Pass 3: RGB -> V.
   SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3);
   device()->SetRenderTarget(0, dst_v);
   DrawScreenAlignedQuad(dst_size);

   // Clear surface references.
   device()->SetTexture(0, NULL);
   return true;
 }

 IDirect3DDevice9* AcceleratedSurfaceTransformer::device() {
   return device_;
 }

 bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) {
   // Compile shaders on first use, if needed. Normally the compilation should
   // already have happened at Init() time, but test code might force
   // us down an unusual path.
   if (!CompileShaderCombo(combo))
     return false;

   HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]);
   if (!SUCCEEDED(hr))
     return false;
   hr = device()->SetPixelShader(pixel_shaders_[combo]);
   if (!SUCCEEDED(hr))
     return false;
   return true;
 }