Roll minigbm forward to upstream's master

BUG: 123765087
BUG: 123764798 (Milestone)
BUG: 77276633 (OKR)
Test: builds
Change-Id: Id9e871dc6cd106339f944604695ba965f80d563d
diff --git a/amdgpu.c b/amdgpu.c
index ee556bc..3dbe0e8 100644
--- a/amdgpu.c
+++ b/amdgpu.c
@@ -37,9 +37,8 @@
 						  DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
 						  DRM_FORMAT_XRGB8888 };
 
-const static uint32_t texture_source_formats[] = { DRM_FORMAT_BGR888, DRM_FORMAT_GR88,
-						   DRM_FORMAT_R8,     DRM_FORMAT_NV21,
-						   DRM_FORMAT_NV12,   DRM_FORMAT_YVU420_ANDROID };
+const static uint32_t texture_source_formats[] = { DRM_FORMAT_GR88, DRM_FORMAT_R8, DRM_FORMAT_NV21,
+						   DRM_FORMAT_NV12, DRM_FORMAT_YVU420_ANDROID };
 
 static int amdgpu_init(struct driver *drv)
 {
@@ -79,6 +78,9 @@
 	drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
 			     &metadata, BO_USE_TEXTURE_MASK);
 
+	/* Android CTS tests require this. */
+	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata, BO_USE_SW_MASK);
+
 	/* Linear formats supported by display. */
 	drv_modify_combination(drv, DRM_FORMAT_ARGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
 	drv_modify_combination(drv, DRM_FORMAT_XRGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
@@ -86,7 +88,8 @@
 
 	/* YUV formats for camera and display. */
 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
-			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT);
+			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
+				   BO_USE_HW_VIDEO_DECODER);
 
 	drv_modify_combination(drv, DRM_FORMAT_NV21, &metadata, BO_USE_SCANOUT);
 
@@ -134,20 +137,30 @@
 	uint32_t plane, stride;
 	struct combination *combo;
 	union drm_amdgpu_gem_create gem_create;
-	struct amdgpu_priv *priv = bo->drv->priv;
 
 	combo = drv_get_combination(bo->drv, format, use_flags);
 	if (!combo)
 		return -EINVAL;
 
-	if (combo->metadata.tiling == TILE_TYPE_DRI)
+	if (combo->metadata.tiling == TILE_TYPE_DRI) {
+#ifdef __ANDROID__
+		/*
+		 * Currently, the gralloc API doesn't differentiate between allocation time and map
+		 * time strides. A workaround for amdgpu DRI buffers is to always to align to 256 at
+		 * allocation time.
+		 *
+		 * See b/115946221,b/117942643
+		 */
+		if (use_flags & (BO_USE_SW_MASK)) {
+			uint32_t bytes_per_pixel = drv_bytes_per_pixel_from_format(format, 0);
+			width = ALIGN(width, 256 / bytes_per_pixel);
+		}
+#endif
 		return dri_bo_create(bo, width, height, format, use_flags);
+	}
 
 	stride = drv_stride_from_format(format, width, 0);
-	if (format == DRM_FORMAT_YVU420_ANDROID)
-		stride = ALIGN(stride, 128);
-	else
-		stride = ALIGN(stride, 64);
+	stride = ALIGN(stride, 256);
 
 	drv_bo_from_format(bo, stride, height, format);
 
@@ -156,19 +169,13 @@
 	gem_create.in.alignment = 256;
 	gem_create.in.domain_flags = 0;
 
-	if (use_flags & (BO_USE_LINEAR | BO_USE_SW))
+	if (use_flags & (BO_USE_LINEAR | BO_USE_SW_MASK))
 		gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
 	gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
 	if (!(use_flags & (BO_USE_SW_READ_OFTEN | BO_USE_SCANOUT)))
 		gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
-	/* If drm_version >= 21 everything exposes explicit synchronization primitives
-	   and chromeos/arc++ will use them. Disable implicit synchronization. */
-	if (priv->drm_version >= 21) {
-		gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
-	}
-
 	/* Allocate the buffer with the preferred heap. */
 	ret = drmCommandWriteRead(drv_get_fd(bo->drv), DRM_AMDGPU_GEM_CREATE, &gem_create,
 				  sizeof(gem_create));
@@ -233,6 +240,32 @@
 		return munmap(vma->addr, vma->length);
 }
 
+static int amdgpu_bo_invalidate(struct bo *bo, struct mapping *mapping)
+{
+	int ret;
+	union drm_amdgpu_gem_wait_idle wait_idle;
+
+	if (bo->priv)
+		return 0;
+
+	memset(&wait_idle, 0, sizeof(wait_idle));
+	wait_idle.in.handle = bo->handles[0].u32;
+	wait_idle.in.timeout = AMDGPU_TIMEOUT_INFINITE;
+
+	ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &wait_idle,
+				  sizeof(wait_idle));
+
+	if (ret < 0) {
+		drv_log("DRM_AMDGPU_GEM_WAIT_IDLE failed with %d\n", ret);
+		return ret;
+	}
+
+	if (ret == 0 && wait_idle.out.status)
+		drv_log("DRM_AMDGPU_GEM_WAIT_IDLE BO is busy\n");
+
+	return 0;
+}
+
 static uint32_t amdgpu_resolve_format(uint32_t format, uint64_t use_flags)
 {
 	switch (format) {
@@ -258,6 +291,7 @@
 	.bo_import = amdgpu_import_bo,
 	.bo_map = amdgpu_map_bo,
 	.bo_unmap = amdgpu_unmap_bo,
+	.bo_invalidate = amdgpu_bo_invalidate,
 	.resolve_format = amdgpu_resolve_format,
 };
 
diff --git a/dri.c b/dri.c
index ae491bb..a9c1ed7 100644
--- a/dri.c
+++ b/dri.c
@@ -9,6 +9,7 @@
 #include <assert.h>
 #include <dlfcn.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
@@ -87,6 +88,21 @@
 }
 
 /*
+ * Close Gem Handle
+ */
+static void close_gem_handle(uint32_t handle, int fd)
+{
+	struct drm_gem_close gem_close;
+	int ret = 0;
+
+	memset(&gem_close, 0, sizeof(gem_close));
+	gem_close.handle = handle;
+	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+	if (ret)
+		drv_log("DRM_IOCTL_GEM_CLOSE failed (handle=%x) error %d\n", handle, ret);
+}
+
+/*
  * The caller is responsible for setting drv->priv to a structure that derives from dri_driver.
  */
 int dri_init(struct driver *drv, const char *dri_so_path, const char *driver_suffix)
@@ -96,9 +112,14 @@
 	const __DRIextension *loader_extensions[] = { NULL };
 
 	struct dri_driver *dri = drv->priv;
+
+	dri->fd = open(drmGetRenderDeviceNameFromFd(drv_get_fd(drv)), O_RDWR);
+	if (dri->fd < 0)
+		return -ENODEV;
+
 	dri->driver_handle = dlopen(dri_so_path, RTLD_NOW | RTLD_GLOBAL);
 	if (!dri->driver_handle)
-		return -ENODEV;
+		goto close_dri_fd;
 
 	snprintf(fname, sizeof(fname), __DRI_DRIVER_GET_EXTENSIONS "_%s", driver_suffix);
 	get_extensions = dlsym(dri->driver_handle, fname);
@@ -118,7 +139,7 @@
 			      (const __DRIextension **)&dri->dri2_extension))
 		goto free_handle;
 
-	dri->device = dri->dri2_extension->createNewScreen2(0, drv_get_fd(drv), loader_extensions,
+	dri->device = dri->dri2_extension->createNewScreen2(0, dri->fd, loader_extensions,
 							    dri->extensions, &dri->configs, NULL);
 	if (!dri->device)
 		goto free_handle;
@@ -146,6 +167,8 @@
 free_handle:
 	dlclose(dri->driver_handle);
 	dri->driver_handle = NULL;
+close_dri_fd:
+	close(dri->fd);
 	return -ENODEV;
 }
 
@@ -160,6 +183,7 @@
 	dri->core_extension->destroyScreen(dri->device);
 	dlclose(dri->driver_handle);
 	dri->driver_handle = NULL;
+	close(dri->fd);
 }
 
 int dri_bo_create(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
@@ -194,12 +218,12 @@
 
 	if (!dri->image_extension->queryImage(bo->priv, __DRI_IMAGE_ATTRIB_STRIDE, &stride)) {
 		ret = -errno;
-		goto free_image;
+		goto close_handle;
 	}
 
 	if (!dri->image_extension->queryImage(bo->priv, __DRI_IMAGE_ATTRIB_OFFSET, &offset)) {
 		ret = -errno;
-		goto free_image;
+		goto close_handle;
 	}
 
 	bo->strides[0] = stride;
@@ -208,6 +232,8 @@
 	bo->total_size = offset + bo->sizes[0];
 	return 0;
 
+close_handle:
+	close_gem_handle(bo->handles[0].u32, bo->drv->fd);
 free_image:
 	dri->image_extension->destroyImage(bo->priv);
 	return ret;
@@ -243,6 +269,7 @@
 	struct dri_driver *dri = bo->drv->priv;
 
 	assert(bo->priv);
+	close_gem_handle(bo->handles[0].u32, bo->drv->fd);
 	dri->image_extension->destroyImage(bo->priv);
 	bo->priv = NULL;
 	return 0;
diff --git a/dri.h b/dri.h
index d01bc5d..f79de99 100644
--- a/dri.h
+++ b/dri.h
@@ -14,6 +14,7 @@
 #include "drv.h"
 
 struct dri_driver {
+	int fd;
 	void *driver_handle;
 	__DRIscreen *device;
 	__DRIcontext *context; /* Needed for map/unmap operations. */
diff --git a/drv.c b/drv.c
index bc1f782..09f303c 100644
--- a/drv.c
+++ b/drv.c
@@ -111,7 +111,8 @@
 #ifdef DRV_VC4
 		&backend_vc4,
 #endif
-		&backend_vgem,     &backend_virtio_gpu,
+		&backend_vgem,
+		&backend_virtio_gpu,
 	};
 
 	for (i = 0; i < ARRAY_SIZE(backend_list); i++)
@@ -361,6 +362,12 @@
 	}
 
 	for (plane = 0; plane < bo->num_planes; plane++) {
+		pthread_mutex_lock(&bo->drv->driver_lock);
+		drv_increment_reference_count(bo->drv, bo, plane);
+		pthread_mutex_unlock(&bo->drv->driver_lock);
+	}
+
+	for (plane = 0; plane < bo->num_planes; plane++) {
 		bo->strides[plane] = data->strides[plane];
 		bo->offsets[plane] = data->offsets[plane];
 		bo->format_modifiers[plane] = data->format_modifiers[plane];
diff --git a/drv_priv.h b/drv_priv.h
index 719cd35..d1369f0 100644
--- a/drv_priv.h
+++ b/drv_priv.h
@@ -85,12 +85,8 @@
 	                    BO_USE_SW_READ_OFTEN | BO_USE_SW_WRITE_OFTEN | \
                             BO_USE_SW_READ_RARELY | BO_USE_SW_WRITE_RARELY | BO_USE_TEXTURE
 
-#define BO_USE_SW BO_USE_SW_READ_OFTEN | BO_USE_SW_WRITE_OFTEN | \
-	    BO_USE_SW_READ_RARELY | BO_USE_SW_WRITE_RARELY
-
-#define BO_USE_SW_OFTEN BO_USE_SW_READ_OFTEN | BO_USE_SW_WRITE_OFTEN
-
-#define BO_USE_SW_RARELY BO_USE_SW_READ_RARELY | BO_USE_SW_WRITE_RARELY
+#define BO_USE_SW_MASK BO_USE_SW_READ_OFTEN | BO_USE_SW_WRITE_OFTEN | \
+		       BO_USE_SW_READ_RARELY | BO_USE_SW_WRITE_RARELY
 
 #ifndef DRM_FORMAT_MOD_LINEAR
 #define DRM_FORMAT_MOD_LINEAR DRM_FORMAT_MOD_NONE
diff --git a/gbm.c b/gbm.c
index c12c269..dd6013c 100644
--- a/gbm.c
+++ b/gbm.c
@@ -265,7 +265,7 @@
 
 PUBLIC uint32_t gbm_bo_get_stride(struct gbm_bo *bo)
 {
-	return gbm_bo_get_plane_stride(bo, 0);
+	return gbm_bo_get_stride_for_plane(bo, 0);
 }
 
 PUBLIC uint32_t gbm_bo_get_stride_or_tiling(struct gbm_bo *bo)
@@ -280,6 +280,11 @@
 
 PUBLIC uint64_t gbm_bo_get_format_modifier(struct gbm_bo *bo)
 {
+	return gbm_bo_get_modifier(bo);
+}
+
+PUBLIC uint64_t gbm_bo_get_modifier(struct gbm_bo *bo)
+{
 	return gbm_bo_get_plane_format_modifier(bo, 0);
 }
 
@@ -290,7 +295,7 @@
 
 PUBLIC union gbm_bo_handle gbm_bo_get_handle(struct gbm_bo *bo)
 {
-	return gbm_bo_get_plane_handle(bo, 0);
+	return gbm_bo_get_handle_for_plane(bo, 0);
 }
 
 PUBLIC int gbm_bo_get_fd(struct gbm_bo *bo)
@@ -300,11 +305,21 @@
 
 PUBLIC size_t gbm_bo_get_num_planes(struct gbm_bo *bo)
 {
+	return gbm_bo_get_plane_count(bo);
+}
+
+PUBLIC size_t gbm_bo_get_plane_count(struct gbm_bo *bo)
+{
 	return drv_bo_get_num_planes(bo->bo);
 }
 
 PUBLIC union gbm_bo_handle gbm_bo_get_plane_handle(struct gbm_bo *bo, size_t plane)
 {
+	return gbm_bo_get_handle_for_plane(bo, plane);
+}
+
+PUBLIC union gbm_bo_handle gbm_bo_get_handle_for_plane(struct gbm_bo *bo, size_t plane)
+{
 	return (union gbm_bo_handle)drv_bo_get_plane_handle(bo->bo, plane).u64;
 }
 
@@ -315,6 +330,11 @@
 
 PUBLIC uint32_t gbm_bo_get_plane_offset(struct gbm_bo *bo, size_t plane)
 {
+	return gbm_bo_get_offset(bo, plane);
+}
+
+PUBLIC uint32_t gbm_bo_get_offset(struct gbm_bo *bo, size_t plane)
+{
 	return drv_bo_get_plane_offset(bo->bo, plane);
 }
 
@@ -325,6 +345,11 @@
 
 PUBLIC uint32_t gbm_bo_get_plane_stride(struct gbm_bo *bo, size_t plane)
 {
+	return gbm_bo_get_stride_for_plane(bo, plane);
+}
+
+PUBLIC uint32_t gbm_bo_get_stride_for_plane(struct gbm_bo *bo, size_t plane)
+{
 	return drv_bo_get_plane_stride(bo->bo, plane);
 }
 
diff --git a/gbm.h b/gbm.h
index ce05ce3..68a34c5 100644
--- a/gbm.h
+++ b/gbm.h
@@ -35,6 +35,10 @@
 
 #define __GBM__ 1
 
+#ifndef MINIGBM
+#define MINIGBM
+#endif
+
 #include <stddef.h>
 #include <stdint.h>
 
@@ -376,9 +380,13 @@
 uint32_t
 gbm_bo_get_format(struct gbm_bo *bo);
 
+/* Deprecated */
 uint64_t
 gbm_bo_get_format_modifier(struct gbm_bo *bo);
 
+uint64_t
+gbm_bo_get_modifier(struct gbm_bo *bo);
+
 struct gbm_device *
 gbm_bo_get_device(struct gbm_bo *bo);
 
@@ -388,24 +396,40 @@
 int
 gbm_bo_get_fd(struct gbm_bo *bo);
 
+/* Deprecated */
 size_t
 gbm_bo_get_num_planes(struct gbm_bo *bo);
 
+size_t
+gbm_bo_get_plane_count(struct gbm_bo *bo);
+
+/* Deprecated */
 union gbm_bo_handle
 gbm_bo_get_plane_handle(struct gbm_bo *bo, size_t plane);
 
+union gbm_bo_handle
+gbm_bo_get_handle_for_plane(struct gbm_bo* bo, size_t plane);
+
 int
 gbm_bo_get_plane_fd(struct gbm_bo *bo, size_t plane);
 
+/* Deprecated */
 uint32_t
 gbm_bo_get_plane_offset(struct gbm_bo *bo, size_t plane);
 
 uint32_t
-gbm_bo_get_plane_size(struct gbm_bo *bo, size_t plane);
+gbm_bo_get_offset(struct gbm_bo *bo, size_t plane);
 
 uint32_t
+gbm_bo_get_plane_size(struct gbm_bo *bo, size_t plane);
+
+/* Deprecated */
+uint32_t
 gbm_bo_get_plane_stride(struct gbm_bo *bo, size_t plane);
 
+uint32_t
+gbm_bo_get_stride_for_plane(struct gbm_bo *bo, size_t plane);
+
 uint64_t
 gbm_bo_get_plane_format_modifier(struct gbm_bo *bo, size_t plane);
 
diff --git a/helpers.c b/helpers.c
index 4fabfa9..6dbc7ce 100644
--- a/helpers.c
+++ b/helpers.c
@@ -261,16 +261,24 @@
 
 	aligned_width = width;
 	aligned_height = height;
-	if (format == DRM_FORMAT_YVU420_ANDROID) {
-		/*
-		 * Align width to 32 pixels, so chroma strides are 16 bytes as
-		 * Android requires.
-		 */
+	switch (format) {
+	case DRM_FORMAT_YVU420_ANDROID:
+		/* Align width to 32 pixels, so chroma strides are 16 bytes as
+		 * Android requires. */
 		aligned_width = ALIGN(width, 32);
-	}
-
-	if (format == DRM_FORMAT_YVU420_ANDROID || format == DRM_FORMAT_YVU420) {
+		/* Adjust the height to include room for chroma planes.
+		 *
+		 * HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not
+		 * be aligned. */
+		aligned_height = 3 * DIV_ROUND_UP(bo->height, 2);
+		break;
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_NV12:
+		/* Adjust the height to include room for chroma planes */
 		aligned_height = 3 * DIV_ROUND_UP(height, 2);
+		break;
+	default:
+		break;
 	}
 
 	memset(&create_dumb, 0, sizeof(create_dumb));
@@ -368,12 +376,6 @@
 		bo->handles[plane].u32 = prime_handle.handle;
 	}
 
-	for (plane = 0; plane < bo->num_planes; plane++) {
-		pthread_mutex_lock(&bo->drv->driver_lock);
-		drv_increment_reference_count(bo->drv, bo, plane);
-		pthread_mutex_unlock(&bo->drv->driver_lock);
-	}
-
 	return 0;
 }
 
@@ -479,14 +481,14 @@
 		drmHashInsert(drv->buffer_table, bo->handles[plane].u32, (void *)(num - 1));
 }
 
-uint32_t drv_log_base2(uint32_t value)
+void drv_add_combination(struct driver *drv, const uint32_t format,
+			 struct format_metadata *metadata, uint64_t use_flags)
 {
-	int ret = 0;
+	struct combination combo = { .format = format,
+				     .metadata = *metadata,
+				     .use_flags = use_flags };
 
-	while (value >>= 1)
-		++ret;
-
-	return ret;
+	drv_array_append(drv->combos, &combo);
 }
 
 void drv_add_combinations(struct driver *drv, const uint32_t *formats, uint32_t num_formats,
diff --git a/helpers.h b/helpers.h
index 4c649c2..4f68c3b 100644
--- a/helpers.h
+++ b/helpers.h
@@ -25,9 +25,8 @@
 uintptr_t drv_get_reference_count(struct driver *drv, struct bo *bo, size_t plane);
 void drv_increment_reference_count(struct driver *drv, struct bo *bo, size_t plane);
 void drv_decrement_reference_count(struct driver *drv, struct bo *bo, size_t plane);
-uint32_t drv_log_base2(uint32_t value);
-int drv_add_combination(struct driver *drv, uint32_t format, struct format_metadata *metadata,
-			uint64_t usage);
+void drv_add_combination(struct driver *drv, uint32_t format, struct format_metadata *metadata,
+			 uint64_t usage);
 void drv_add_combinations(struct driver *drv, const uint32_t *formats, uint32_t num_formats,
 			  struct format_metadata *metadata, uint64_t usage);
 void drv_modify_combination(struct driver *drv, uint32_t format, struct format_metadata *metadata,
diff --git a/i915.c b/i915.c
index a88db6a..43e72c2 100644
--- a/i915.c
+++ b/i915.c
@@ -23,10 +23,10 @@
 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
 
 static const uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888,    DRM_FORMAT_ARGB1555,
-						  DRM_FORMAT_ARGB8888,    DRM_FORMAT_BGR888,
-						  DRM_FORMAT_RGB565,      DRM_FORMAT_XBGR2101010,
-						  DRM_FORMAT_XBGR8888,    DRM_FORMAT_XRGB1555,
-						  DRM_FORMAT_XRGB2101010, DRM_FORMAT_XRGB8888 };
+						  DRM_FORMAT_ARGB8888,    DRM_FORMAT_RGB565,
+						  DRM_FORMAT_XBGR2101010, DRM_FORMAT_XBGR8888,
+						  DRM_FORMAT_XRGB1555,    DRM_FORMAT_XRGB2101010,
+						  DRM_FORMAT_XRGB8888 };
 
 static const uint32_t tileable_texture_source_formats[] = { DRM_FORMAT_GR88, DRM_FORMAT_R8,
 							    DRM_FORMAT_UYVY, DRM_FORMAT_YUYV };
@@ -137,6 +137,9 @@
 			     ARRAY_SIZE(tileable_texture_source_formats), &metadata,
 			     texture_use_flags);
 
+	/* Android CTS tests require this. */
+	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata, BO_USE_SW_MASK);
+
 	drv_modify_combination(drv, DRM_FORMAT_XRGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
 	drv_modify_combination(drv, DRM_FORMAT_ARGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
 
@@ -154,11 +157,13 @@
 	render_use_flags &= ~BO_USE_SW_WRITE_OFTEN;
 	render_use_flags &= ~BO_USE_SW_READ_OFTEN;
 	render_use_flags &= ~BO_USE_LINEAR;
+	render_use_flags &= ~BO_USE_PROTECTED;
 
 	texture_use_flags &= ~BO_USE_RENDERSCRIPT;
 	texture_use_flags &= ~BO_USE_SW_WRITE_OFTEN;
 	texture_use_flags &= ~BO_USE_SW_READ_OFTEN;
 	texture_use_flags &= ~BO_USE_LINEAR;
+	texture_use_flags &= ~BO_USE_PROTECTED;
 
 	metadata.tiling = I915_TILING_X;
 	metadata.priority = 2;
@@ -183,9 +188,8 @@
 			     texture_use_flags);
 
 	/* Support y-tiled NV12 for libva */
-	const uint32_t nv12_format = DRM_FORMAT_NV12;
-	drv_add_combinations(drv, &nv12_format, 1, &metadata,
-			     BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER);
+	drv_add_combination(drv, DRM_FORMAT_NV12, &metadata,
+			    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER);
 
 	kms_items = drv_query_kms(drv);
 	if (!kms_items)
@@ -469,7 +473,17 @@
 		struct drm_i915_gem_mmap gem_map;
 		memset(&gem_map, 0, sizeof(gem_map));
 
-		if ((bo->use_flags & BO_USE_SCANOUT) && !(bo->use_flags & BO_USE_RENDERSCRIPT))
+		/* TODO(b/118799155): We don't seem to have a good way to
+		 * detect the use cases for which WC mapping is really needed.
+		 * The current heuristic seems overly coarse and may be slowing
+		 * down some other use cases unnecessarily.
+		 *
+		 * For now, care must be taken not to use WC mappings for
+		 * Renderscript and camera use cases, as they're
+		 * performance-sensitive. */
+		if ((bo->use_flags & BO_USE_SCANOUT) &&
+		    !(bo->use_flags &
+		      (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE)))
 			gem_map.flags = I915_MMAP_WC;
 
 		gem_map.handle = bo->handles[0].u32;
diff --git a/mediatek.c b/mediatek.c
index 64c410f..59a0fac 100644
--- a/mediatek.c
+++ b/mediatek.c
@@ -7,9 +7,12 @@
 #ifdef DRV_MEDIATEK
 
 // clang-format off
+#include <fcntl.h>
+#include <poll.h>
 #include <stdio.h>
 #include <string.h>
 #include <sys/mman.h>
+#include <unistd.h>
 #include <xf86drm.h>
 #include <mediatek_drm.h>
 // clang-format on
@@ -18,26 +21,41 @@
 #include "helpers.h"
 #include "util.h"
 
+#define TILE_TYPE_LINEAR 0
+
 struct mediatek_private_map_data {
 	void *cached_addr;
 	void *gem_addr;
+	int prime_fd;
 };
 
 static const uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888,
-						  DRM_FORMAT_BGR888,   DRM_FORMAT_RGB565,
-						  DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888 };
+						  DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
+						  DRM_FORMAT_XRGB8888 };
 
 static const uint32_t texture_source_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_YVU420,
 						   DRM_FORMAT_YVU420_ANDROID };
 
 static int mediatek_init(struct driver *drv)
 {
+	struct format_metadata metadata;
+
 	drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
 			     &LINEAR_METADATA, BO_USE_RENDER_MASK);
 
 	drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
 			     &LINEAR_METADATA, BO_USE_TEXTURE_MASK);
 
+	/* Android CTS tests require this. */
+	drv_add_combination(drv, DRM_FORMAT_BGR888, &LINEAR_METADATA, BO_USE_SW_MASK);
+
+	/* Support BO_USE_HW_VIDEO_DECODER for protected content minigbm allocations. */
+	metadata.tiling = TILE_TYPE_LINEAR;
+	metadata.priority = 1;
+	metadata.modifier = DRM_FORMAT_MOD_LINEAR;
+	drv_modify_combination(drv, DRM_FORMAT_YVU420, &metadata, BO_USE_HW_VIDEO_DECODER);
+	drv_modify_combination(drv, DRM_FORMAT_YVU420_ANDROID, &metadata, BO_USE_HW_VIDEO_DECODER);
+
 	return drv_modify_linear_combinations(drv);
 }
 
@@ -74,7 +92,7 @@
 
 static void *mediatek_bo_map(struct bo *bo, struct vma *vma, size_t plane, uint32_t map_flags)
 {
-	int ret;
+	int ret, prime_fd;
 	struct drm_mtk_gem_map_off gem_map;
 	struct mediatek_private_map_data *priv;
 
@@ -87,16 +105,24 @@
 		return MAP_FAILED;
 	}
 
+	ret = drmPrimeHandleToFD(bo->drv->fd, gem_map.handle, DRM_CLOEXEC, &prime_fd);
+	if (ret) {
+		drv_log("Failed to get a prime fd\n");
+		return MAP_FAILED;
+	}
+
 	void *addr = mmap(0, bo->total_size, drv_get_prot(map_flags), MAP_SHARED, bo->drv->fd,
 			  gem_map.offset);
 
 	vma->length = bo->total_size;
 
+	priv = calloc(1, sizeof(*priv));
+	priv->prime_fd = prime_fd;
+	vma->priv = priv;
+
 	if (bo->use_flags & BO_USE_RENDERSCRIPT) {
-		priv = calloc(1, sizeof(*priv));
 		priv->cached_addr = calloc(1, bo->total_size);
 		priv->gem_addr = addr;
-		vma->priv = priv;
 		addr = priv->cached_addr;
 	}
 
@@ -107,8 +133,13 @@
 {
 	if (vma->priv) {
 		struct mediatek_private_map_data *priv = vma->priv;
-		vma->addr = priv->gem_addr;
-		free(priv->cached_addr);
+
+		if (priv->cached_addr) {
+			vma->addr = priv->gem_addr;
+			free(priv->cached_addr);
+		}
+
+		close(priv->prime_fd);
 		free(priv);
 		vma->priv = NULL;
 	}
@@ -118,9 +149,25 @@
 
 static int mediatek_bo_invalidate(struct bo *bo, struct mapping *mapping)
 {
-	if (mapping->vma->priv) {
-		struct mediatek_private_map_data *priv = mapping->vma->priv;
-		memcpy(priv->cached_addr, priv->gem_addr, bo->total_size);
+	struct mediatek_private_map_data *priv = mapping->vma->priv;
+
+	if (priv) {
+		struct pollfd fds = {
+			.fd = priv->prime_fd,
+		};
+
+		if (mapping->vma->map_flags & BO_MAP_WRITE)
+			fds.events |= POLLOUT;
+
+		if (mapping->vma->map_flags & BO_MAP_READ)
+			fds.events |= POLLIN;
+
+		poll(&fds, 1, -1);
+		if (fds.revents != fds.events)
+			drv_log("poll prime_fd failed\n");
+
+		if (priv->cached_addr)
+			memcpy(priv->cached_addr, priv->gem_addr, bo->total_size);
 	}
 
 	return 0;
@@ -129,7 +176,7 @@
 static int mediatek_bo_flush(struct bo *bo, struct mapping *mapping)
 {
 	struct mediatek_private_map_data *priv = mapping->vma->priv;
-	if (priv && (mapping->vma->map_flags & BO_MAP_WRITE))
+	if (priv && priv->cached_addr && (mapping->vma->map_flags & BO_MAP_WRITE))
 		memcpy(priv->gem_addr, priv->cached_addr, bo->total_size);
 
 	return 0;
diff --git a/msm.c b/msm.c
index 420abfd..085cfb0 100644
--- a/msm.c
+++ b/msm.c
@@ -6,45 +6,274 @@
 
 #ifdef DRV_MSM
 
+#include <assert.h>
+#include <drm_fourcc.h>
+#include <errno.h>
+#include <msm_drm.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <xf86drm.h>
+
 #include "drv_priv.h"
 #include "helpers.h"
 #include "util.h"
 
-#define MESA_LLVMPIPE_TILE_ORDER 6
-#define MESA_LLVMPIPE_TILE_SIZE (1 << MESA_LLVMPIPE_TILE_ORDER)
+/* Alignment values are based on SDM845 Gfx IP */
+#define DEFAULT_ALIGNMENT 64
+#define BUFFER_SIZE_ALIGN 4096
 
-static const uint32_t render_target_formats[] = { DRM_FORMAT_ARGB8888, DRM_FORMAT_BGR888,
+#define VENUS_STRIDE_ALIGN 128
+#define VENUS_SCANLINE_ALIGN 16
+#define NV12_LINEAR_PADDING (12 * 1024)
+#define NV12_UBWC_PADDING(y_stride) (MAX(16 * 1024, y_stride * 48))
+#define MACROTILE_WIDTH_ALIGN 64
+#define MACROTILE_HEIGHT_ALIGN 16
+#define PLANE_SIZE_ALIGN 4096
+
+#define MSM_UBWC_TILING 1
+
+static const uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888,
+						  DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
 						  DRM_FORMAT_XRGB8888 };
 
+static const uint32_t texture_source_formats[] = { DRM_FORMAT_NV12, DRM_FORMAT_R8,
+						   DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
+
+/*
+ * Each macrotile consists of m x n (mostly 4 x 4) tiles.
+ * Pixel data pitch/stride is aligned with macrotile width.
+ * Pixel data height is aligned with macrotile height.
+ * Entire pixel data buffer is aligned with 4k(bytes).
+ */
+static uint32_t get_ubwc_meta_size(uint32_t width, uint32_t height, uint32_t tile_width,
+				   uint32_t tile_height)
+{
+	uint32_t macrotile_width, macrotile_height;
+
+	macrotile_width = DIV_ROUND_UP(width, tile_width);
+	macrotile_height = DIV_ROUND_UP(height, tile_height);
+
+	// Align meta buffer width to 64 blocks
+	macrotile_width = ALIGN(macrotile_width, MACROTILE_WIDTH_ALIGN);
+
+	// Align meta buffer height to 16 blocks
+	macrotile_height = ALIGN(macrotile_height, MACROTILE_HEIGHT_ALIGN);
+
+	return ALIGN(macrotile_width * macrotile_height, PLANE_SIZE_ALIGN);
+}
+
+static void msm_calculate_layout(struct bo *bo)
+{
+	uint32_t width, height;
+
+	width = bo->width;
+	height = bo->height;
+
+	/* NV12 format requires extra padding with platform
+	 * specific alignments for venus driver
+	 */
+	if (bo->format == DRM_FORMAT_NV12) {
+		uint32_t y_stride, uv_stride, y_scanline, uv_scanline, y_plane, uv_plane, size,
+		    extra_padding;
+
+		y_stride = ALIGN(width, VENUS_STRIDE_ALIGN);
+		uv_stride = ALIGN(width, VENUS_STRIDE_ALIGN);
+		y_scanline = ALIGN(height, VENUS_SCANLINE_ALIGN * 2);
+		uv_scanline = ALIGN(DIV_ROUND_UP(height, 2), VENUS_SCANLINE_ALIGN);
+		y_plane = y_stride * y_scanline;
+		uv_plane = uv_stride * uv_scanline;
+
+		if (bo->tiling == MSM_UBWC_TILING) {
+			y_plane += get_ubwc_meta_size(width, height, 32, 8);
+			uv_plane += get_ubwc_meta_size(width >> 1, height >> 1, 16, 8);
+			extra_padding = NV12_UBWC_PADDING(y_stride);
+		} else {
+			extra_padding = NV12_LINEAR_PADDING;
+		}
+
+		bo->strides[0] = y_stride;
+		bo->sizes[0] = y_plane;
+		bo->offsets[1] = y_plane;
+		bo->strides[1] = uv_stride;
+		size = y_plane + uv_plane + extra_padding;
+		bo->total_size = ALIGN(size, BUFFER_SIZE_ALIGN);
+		bo->sizes[1] = bo->total_size - bo->sizes[0];
+	} else {
+		uint32_t stride, alignw, alignh;
+
+		alignw = ALIGN(width, DEFAULT_ALIGNMENT);
+		/* HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not be aligned. */
+		if (bo->format == DRM_FORMAT_YVU420_ANDROID) {
+			alignh = height;
+		} else {
+			alignh = ALIGN(height, DEFAULT_ALIGNMENT);
+		}
+
+		stride = drv_stride_from_format(bo->format, alignw, 0);
+
+		/* Calculate size and assign stride, size, offset to each plane based on format */
+		drv_bo_from_format(bo, stride, alignh, bo->format);
+
+		/* For all RGB UBWC formats */
+		if (bo->tiling == MSM_UBWC_TILING) {
+			bo->sizes[0] += get_ubwc_meta_size(width, height, 16, 4);
+			bo->total_size = bo->sizes[0];
+			assert(IS_ALIGNED(bo->total_size, BUFFER_SIZE_ALIGN));
+		}
+	}
+}
+
+static bool is_ubwc_fmt(uint32_t format)
+{
+	switch (format) {
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_NV12:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static void msm_add_ubwc_combinations(struct driver *drv, const uint32_t *formats,
+				      uint32_t num_formats, struct format_metadata *metadata,
+				      uint64_t use_flags)
+{
+	for (uint32_t i = 0; i < num_formats; i++) {
+		if (is_ubwc_fmt(formats[i])) {
+			struct combination combo = { .format = formats[i],
+						     .metadata = *metadata,
+						     .use_flags = use_flags };
+			drv_array_append(drv->combos, &combo);
+		}
+	}
+}
+
 static int msm_init(struct driver *drv)
 {
-	drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
-			     &LINEAR_METADATA, BO_USE_RENDER_MASK);
+	struct format_metadata metadata;
+	uint64_t render_use_flags = BO_USE_RENDER_MASK;
+	uint64_t texture_use_flags = BO_USE_TEXTURE_MASK | BO_USE_HW_VIDEO_DECODER;
+	uint64_t sw_flags = (BO_USE_RENDERSCRIPT | BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_OFTEN |
+			     BO_USE_LINEAR | BO_USE_PROTECTED);
 
-	return drv_modify_linear_combinations(drv);
+	drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
+			     &LINEAR_METADATA, render_use_flags);
+
+	drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
+			     &LINEAR_METADATA, texture_use_flags);
+
+	/* Android CTS tests require this. */
+	drv_add_combination(drv, DRM_FORMAT_BGR888, &LINEAR_METADATA, BO_USE_SW_MASK);
+
+	drv_modify_linear_combinations(drv);
+
+	metadata.tiling = MSM_UBWC_TILING;
+	metadata.priority = 2;
+	metadata.modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
+
+	render_use_flags &= ~sw_flags;
+	texture_use_flags &= ~sw_flags;
+
+	msm_add_ubwc_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
+				  &metadata, render_use_flags);
+
+	msm_add_ubwc_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
+				  &metadata, texture_use_flags);
+
+	return 0;
 }
 
+static int msm_bo_create_for_modifier(struct bo *bo, uint32_t width, uint32_t height,
+				      uint32_t format, const uint64_t modifier)
+{
+	struct drm_msm_gem_new req;
+	int ret;
+	size_t i;
+
+	bo->tiling = (modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) ? MSM_UBWC_TILING : 0;
+
+	msm_calculate_layout(bo);
+
+	memset(&req, 0, sizeof(req));
+	req.flags = MSM_BO_WC | MSM_BO_SCANOUT;
+	req.size = bo->total_size;
+
+	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_MSM_GEM_NEW, &req);
+	if (ret) {
+		drv_log("DRM_IOCTL_MSM_GEM_NEW failed with %s\n", strerror(errno));
+		return ret;
+	}
+
+	/*
+	 * Though we use only one plane, we need to set handle for
+	 * all planes to pass kernel checks
+	 */
+	for (i = 0; i < bo->num_planes; i++) {
+		bo->handles[i].u32 = req.handle;
+		bo->format_modifiers[i] = modifier;
+	}
+
+	return 0;
+}
+
+static int msm_bo_create_with_modifiers(struct bo *bo, uint32_t width, uint32_t height,
+					uint32_t format, const uint64_t *modifiers, uint32_t count)
+{
+	static const uint64_t modifier_order[] = {
+		DRM_FORMAT_MOD_QCOM_COMPRESSED,
+		DRM_FORMAT_MOD_LINEAR,
+	};
+
+	uint64_t modifier =
+	    drv_pick_modifier(modifiers, count, modifier_order, ARRAY_SIZE(modifier_order));
+
+	return msm_bo_create_for_modifier(bo, width, height, format, modifier);
+}
+
+/* msm_bo_create will create linear buffers for now */
 static int msm_bo_create(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
 			 uint64_t flags)
 {
-	width = ALIGN(width, MESA_LLVMPIPE_TILE_SIZE);
-	height = ALIGN(height, MESA_LLVMPIPE_TILE_SIZE);
+	struct combination *combo = drv_get_combination(bo->drv, format, flags);
 
-	/* HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not be aligned. */
-	if (bo->format == DRM_FORMAT_YVU420_ANDROID)
-		height = bo->height;
+	if (!combo) {
+		drv_log("invalid format = %d, flags = %llx combination\n", format, flags);
+		return -EINVAL;
+	}
 
-	return drv_dumb_bo_create(bo, width, height, format, flags);
+	return msm_bo_create_for_modifier(bo, width, height, format, combo->metadata.modifier);
+}
+
+static void *msm_bo_map(struct bo *bo, struct vma *vma, size_t plane, uint32_t map_flags)
+{
+	int ret;
+	struct drm_msm_gem_info req;
+
+	memset(&req, 0, sizeof(req));
+	req.handle = bo->handles[0].u32;
+
+	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_MSM_GEM_INFO, &req);
+	if (ret) {
+		drv_log("DRM_IOCLT_MSM_GEM_INFO failed with %s\n", strerror(errno));
+		return MAP_FAILED;
+	}
+	vma->length = bo->total_size;
+
+	return mmap(0, bo->total_size, drv_get_prot(map_flags), MAP_SHARED, bo->drv->fd,
+		    req.offset);
 }
 
 const struct backend backend_msm = {
 	.name = "msm",
 	.init = msm_init,
 	.bo_create = msm_bo_create,
-	.bo_destroy = drv_dumb_bo_destroy,
+	.bo_create_with_modifiers = msm_bo_create_with_modifiers,
+	.bo_destroy = drv_gem_bo_destroy,
 	.bo_import = drv_prime_bo_import,
-	.bo_map = drv_dumb_bo_map,
+	.bo_map = msm_bo_map,
 	.bo_unmap = drv_bo_munmap,
 };
-
 #endif /* DRV_MSM */
diff --git a/rockchip.c b/rockchip.c
index ccc0335..177f9c7 100644
--- a/rockchip.c
+++ b/rockchip.c
@@ -127,7 +127,7 @@
 
 	/* Camera ISP supports only NV12 output. */
 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
-			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE);
+			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER);
 	/*
 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
 	 * from camera.
diff --git a/vgem.c b/vgem.c
index 5380b78..14691d1 100644
--- a/vgem.c
+++ b/vgem.c
@@ -35,10 +35,6 @@
 	width = ALIGN(width, MESA_LLVMPIPE_TILE_SIZE);
 	height = ALIGN(height, MESA_LLVMPIPE_TILE_SIZE);
 
-	/* HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not be aligned. */
-	if (bo->format == DRM_FORMAT_YVU420_ANDROID)
-		height = bo->height;
-
 	return drv_dumb_bo_create(bo, width, height, format, flags);
 }
 
diff --git a/virtio_gpu.c b/virtio_gpu.c
index e5729ae..36ba8bc 100644
--- a/virtio_gpu.c
+++ b/virtio_gpu.c
@@ -26,11 +26,12 @@
 #define MESA_LLVMPIPE_TILE_SIZE (1 << MESA_LLVMPIPE_TILE_ORDER)
 
 static const uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888,
-						  DRM_FORMAT_BGR888,   DRM_FORMAT_RGB565,
-						  DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888 };
+						  DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
+						  DRM_FORMAT_XRGB8888 };
 
 static const uint32_t dumb_texture_source_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_YVU420,
-							DRM_FORMAT_YVU420_ANDROID };
+							DRM_FORMAT_YVU420_ANDROID,
+							DRM_FORMAT_NV12 };
 
 static const uint32_t texture_source_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_RG88 };
 
@@ -63,12 +64,10 @@
 static int virtio_dumb_bo_create(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
 				 uint64_t use_flags)
 {
-	width = ALIGN(width, MESA_LLVMPIPE_TILE_SIZE);
-	height = ALIGN(height, MESA_LLVMPIPE_TILE_SIZE);
-
-	/* HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not be aligned. */
-	if (bo->format == DRM_FORMAT_YVU420_ANDROID)
-		height = bo->height;
+	if (bo->format != DRM_FORMAT_R8) {
+		width = ALIGN(width, MESA_LLVMPIPE_TILE_SIZE);
+		height = ALIGN(height, MESA_LLVMPIPE_TILE_SIZE);
+	}
 
 	return drv_dumb_bo_create(bo, width, height, format, use_flags);
 }
@@ -187,6 +186,14 @@
 				     ARRAY_SIZE(dumb_texture_source_formats), &LINEAR_METADATA,
 				     BO_USE_TEXTURE_MASK);
 
+	/* Android CTS tests require this. */
+	drv_add_combination(drv, DRM_FORMAT_BGR888, &LINEAR_METADATA, BO_USE_SW_MASK);
+
+	drv_modify_combination(drv, DRM_FORMAT_NV12, &LINEAR_METADATA,
+			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE);
+	drv_modify_combination(drv, DRM_FORMAT_R8, &LINEAR_METADATA,
+			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE);
+
 	return drv_modify_linear_combinations(drv);
 }
 
@@ -283,6 +290,9 @@
 {
 	switch (format) {
 	case DRM_FORMAT_FLEX_IMPLEMENTATION_DEFINED:
+		/* Camera subsystem requires NV12. */
+		if (use_flags & (BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE))
+			return DRM_FORMAT_NV12;
 		/*HACK: See b/28671744 */
 		return DRM_FORMAT_XBGR8888;
 	case DRM_FORMAT_FLEX_YCbCr_420_888: