gallium: add PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE to skip util_range lock

u_upload_mgr sets it, so that util_range_add can skip the lock.

The time spent in tc_transfer_flush_region decreases from 0.8% to 0.2%
in torcs on radeonsi.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h
index 66faa10..9a158a4 100644
--- a/src/gallium/auxiliary/util/u_range.h
+++ b/src/gallium/auxiliary/util/u_range.h
@@ -35,7 +35,7 @@
 #define U_RANGE_H
 
 #include "os/os_thread.h"
-
+#include "pipe/p_state.h"
 #include "util/u_math.h"
 #include "util/simple_mtx.h"
 
@@ -57,13 +57,19 @@
 
 /* This is like a union of two sets. */
 static inline void
-util_range_add(struct util_range *range, unsigned start, unsigned end)
+util_range_add(struct pipe_resource *resource, struct util_range *range,
+               unsigned start, unsigned end)
 {
    if (start < range->start || end > range->end) {
-      simple_mtx_lock(&range->write_mutex);
-      range->start = MIN2(start, range->start);
-      range->end = MAX2(end, range->end);
-      simple_mtx_unlock(&range->write_mutex);
+      if (resource->flags & PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE) {
+         range->start = MIN2(start, range->start);
+         range->end = MAX2(end, range->end);
+      } else {
+         simple_mtx_lock(&range->write_mutex);
+         range->start = MIN2(start, range->start);
+         range->end = MAX2(end, range->end);
+         simple_mtx_unlock(&range->write_mutex);
+      }
    }
 }
 
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 31aa18e3..b50b2c0 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -879,7 +879,8 @@
             struct threaded_resource *tres =
                threaded_resource(images[i].resource);
 
-            util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
+            util_range_add(&tres->b, &tres->valid_buffer_range,
+                           images[i].u.buf.offset,
                            images[i].u.buf.offset + images[i].u.buf.size);
          }
       }
@@ -945,7 +946,8 @@
          if (src->buffer) {
             struct threaded_resource *tres = threaded_resource(src->buffer);
 
-            util_range_add(&tres->valid_buffer_range, src->buffer_offset,
+            util_range_add(&tres->b, &tres->valid_buffer_range,
+                           src->buffer_offset,
                            src->buffer_offset + src->buffer_size);
          }
       }
@@ -1135,7 +1137,7 @@
    struct pipe_stream_output_target *view;
 
    tc_sync(threaded_context(_pipe));
-   util_range_add(&tres->valid_buffer_range, buffer_offset,
+   util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
                   buffer_offset + buffer_size);
 
    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
@@ -1538,7 +1540,8 @@
                               ttrans->staging, 0, &src_box);
    }
 
-   util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
+   util_range_add(&tres->b, tres->base_valid_buffer_range,
+                  box->x, box->x + box->width);
 }
 
 static void
@@ -1658,7 +1661,7 @@
       return;
    }
 
-   util_range_add(&tres->valid_buffer_range, offset, offset + size);
+   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
 
    /* The upload is small. Enqueue it. */
    struct tc_buffer_subdata *p =
@@ -2185,7 +2188,8 @@
    p->src_box = *src_box;
 
    if (dst->target == PIPE_BUFFER)
-      util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
+      util_range_add(&tdst->b, &tdst->valid_buffer_range,
+                     dstx, dstx + src_box->width);
 }
 
 static void
@@ -2401,7 +2405,7 @@
    memcpy(p->clear_value, clear_value, clear_value_size);
    p->clear_value_size = clear_value_size;
 
-   util_range_add(&tres->valid_buffer_range, offset, offset + size);
+   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
 }
 
 struct tc_clear_texture {
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 73f6cae..4ac4685 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -202,7 +202,7 @@
    buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
    buffer.bind = upload->bind;
    buffer.usage = upload->usage;
-   buffer.flags = upload->flags;
+   buffer.flags = upload->flags | PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE;
    buffer.width0 = size;
    buffer.height0 = 1;
    buffer.depth0 = 1;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index fcc8fc6..d52f944 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -419,7 +419,7 @@
 	struct fd_resource *rsc = fd_resource(ptrans->resource);
 
 	if (ptrans->resource->target == PIPE_BUFFER)
-		util_range_add(&rsc->valid_buffer_range,
+		util_range_add(&rsc->base, &rsc->valid_buffer_range,
 					   ptrans->box.x + box->x,
 					   ptrans->box.x + box->x + box->width);
 }
@@ -489,7 +489,7 @@
 		fd_bo_cpu_fini(rsc->bo);
 	}
 
-	util_range_add(&rsc->valid_buffer_range,
+	util_range_add(&rsc->base, &rsc->valid_buffer_range,
 				   ptrans->box.x,
 				   ptrans->box.x + ptrans->box.width);
 
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 3133d27..4f31548 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -472,7 +472,7 @@
 	target->buffer_size = buffer_size;
 
 	assert(rsc->base.target == PIPE_BUFFER);
-	util_range_add(&rsc->valid_buffer_range,
+	util_range_add(&rsc->base, &rsc->valid_buffer_range,
 		buffer_offset, buffer_offset + buffer_size);
 
 	return target;
diff --git a/src/gallium/drivers/iris/iris_blit.c b/src/gallium/drivers/iris/iris_blit.c
index 1246abe..8dfc9cc 100644
--- a/src/gallium/drivers/iris/iris_blit.c
+++ b/src/gallium/drivers/iris/iris_blit.c
@@ -453,7 +453,7 @@
       tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
 
    if (dst_res->base.target == PIPE_BUFFER)
-      util_range_add(&dst_res->valid_buffer_range, dst_x0, dst_x1);
+      util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dst_x0, dst_x1);
 
    struct blorp_batch blorp_batch;
    blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
@@ -577,7 +577,7 @@
       tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
 
    if (dst->target == PIPE_BUFFER)
-      util_range_add(&dst_res->valid_buffer_range, dstx, dstx + src_box->width);
+      util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);
 
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       struct blorp_address src_addr = {
diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c
index ff78352..b71d4f2 100644
--- a/src/gallium/drivers/iris/iris_clear.c
+++ b/src/gallium/drivers/iris/iris_clear.c
@@ -351,7 +351,7 @@
    }
 
    if (p_res->target == PIPE_BUFFER)
-      util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
+      util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
 
    iris_batch_maybe_flush(batch, 1500);
 
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index b2f91eb..cfdd43c 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -920,7 +920,7 @@
       return NULL;
    }
 
-   util_range_add(&res->valid_buffer_range, 0, templ->width0);
+   util_range_add(&res->base, &res->valid_buffer_range, 0, templ->width0);
 
    return &res->base;
 }
@@ -1789,7 +1789,7 @@
                             box->x + box->width);
 
    if (usage & PIPE_TRANSFER_WRITE)
-      util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
+      util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
 
    /* Avoid using GPU copies for persistent/coherent buffers, as the idea
     * there is to access them simultaneously on the CPU & GPU.  This also
@@ -1874,7 +1874,7 @@
       if (map->dest_had_defined_contents)
          history_flush |= iris_flush_bits_for_history(res);
 
-      util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
+      util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
    }
 
    if (history_flush & ~PIPE_CONTROL_CS_STALL) {
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index ed7d600..ab1578f 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -2481,7 +2481,7 @@
                                       &image_params[start_slot + i],
                                       &res->surf, &view);
          } else {
-            util_range_add(&res->valid_buffer_range, img->u.buf.offset,
+            util_range_add(&res->base, &res->valid_buffer_range, img->u.buf.offset,
                            img->u.buf.offset + img->u.buf.size);
 
             fill_buffer_surface_state(&screen->isl_dev, res, map,
@@ -3010,7 +3010,7 @@
          res->bind_history |= PIPE_BIND_SHADER_BUFFER;
          res->bind_stages |= 1 << stage;
 
-         util_range_add(&res->valid_buffer_range, ssbo->buffer_offset,
+         util_range_add(&res->base, &res->valid_buffer_range, ssbo->buffer_offset,
                         ssbo->buffer_offset + ssbo->buffer_size);
       } else {
          pipe_resource_reference(&shs->ssbo[start_slot + i].buffer, NULL);
@@ -3254,7 +3254,7 @@
    cso->base.buffer_size = buffer_size;
    cso->base.context = ctx;
 
-   util_range_add(&res->valid_buffer_range, buffer_offset,
+   util_range_add(&res->base, &res->valid_buffer_range, buffer_offset,
                   buffer_offset + buffer_size);
 
    upload_state(ctx->stream_uploader, &cso->offset, sizeof(uint32_t), 4);
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 97305d9..42f68fa 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -515,7 +515,7 @@
    if (tx->map)
       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
 
-   util_range_add(&buf->valid_buffer_range,
+   util_range_add(&buf->base, &buf->valid_buffer_range,
                   tx->base.box.x + box->x,
                   tx->base.box.x + box->x + box->width);
 }
@@ -539,7 +539,7 @@
          if (tx->map)
             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
 
-         util_range_add(&buf->valid_buffer_range,
+         util_range_add(&buf->base, &buf->valid_buffer_range,
                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
       }
 
@@ -590,7 +590,7 @@
                                 &src->base, 0, &src_box);
    }
 
-   util_range_add(&dst->valid_buffer_range, dstx, dstx + size);
+   util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + size);
 }
 
 
@@ -725,7 +725,7 @@
    buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
 
    util_range_init(&buffer->valid_buffer_range);
-   util_range_add(&buffer->valid_buffer_range, 0, bytes);
+   util_range_add(&buffer->base, &buffer->valid_buffer_range, 0, bytes);
 
    return &buffer->base;
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index a4163aa..6488c71 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -1148,7 +1148,7 @@
    pipe_reference_init(&targ->pipe.reference, 1);
 
    assert(buf->base.target == PIPE_BUFFER);
-   util_range_add(&buf->valid_buffer_range, offset, offset + size);
+   util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
 
    return &targ->pipe;
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index de840eb..84e537c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -724,7 +724,7 @@
       return;
    }
 
-   util_range_add(&buf->valid_buffer_range, offset, offset + size);
+   util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
 
    assert(size % data_size == 0);
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 3ab2f5e..af5c6f7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -285,7 +285,7 @@
          PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
          PUSH_DATA (push, 0);
          BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
-         util_range_add(&res->valid_buffer_range,
+         util_range_add(&res->base, &res->valid_buffer_range,
                         nvc0->buffers[s][i].buffer_offset,
                         nvc0->buffers[s][i].buffer_offset +
                         nvc0->buffers[s][i].buffer_size);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 672b3e1..af23798 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -409,7 +409,7 @@
                          result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
                          ready);
 
-      util_range_add(&buf->valid_buffer_range, offset,
+      util_range_add(&buf->base, &buf->valid_buffer_range, offset,
                      offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
 
       nvc0_resource_validate(buf, NOUVEAU_BO_WR);
@@ -508,7 +508,7 @@
    PUSH_DATAh(push, buf->address + offset);
    PUSH_DATA (push, buf->address + offset);
 
-   util_range_add(&buf->valid_buffer_range, offset,
+   util_range_add(&buf->base, &buf->valid_buffer_range, offset,
                   offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
 
    nvc0_resource_validate(buf, NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 6fde2de..49546e5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1065,7 +1065,7 @@
    pipe_reference_init(&targ->pipe.reference, 1);
 
    assert(buf->base.target == PIPE_BUFFER);
-   util_range_add(&buf->valid_buffer_range, offset, offset + size);
+   util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
 
    return &targ->pipe;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 40a42f5..85dcf80 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -644,7 +644,7 @@
             PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
             PUSH_DATA (push, 0);
             BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
-            util_range_add(&res->valid_buffer_range,
+            util_range_add(&res->base, &res->valid_buffer_range,
                            nvc0->buffers[s][i].buffer_offset,
                            nvc0->buffers[s][i].buffer_offset +
                            nvc0->buffers[s][i].buffer_size);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 56f459c..7031aef 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -534,7 +534,7 @@
       return;
    }
 
-   util_range_add(&buf->valid_buffer_range, offset, offset + size);
+   util_range_add(&buf->base, &buf->valid_buffer_range, offset, offset + size);
 
    assert(size % data_size == 0);
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 8820b5a..f62e508 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -948,7 +948,7 @@
 
    assert(view->resource->target == PIPE_BUFFER);
 
-   util_range_add(&res->valid_buffer_range,
+   util_range_add(&res->base, &res->valid_buffer_range,
                   view->u.buf.offset,
                   view->u.buf.offset + view->u.buf.size);
 }
@@ -1472,7 +1472,7 @@
       res->flags = (access & 3) << 8;
       if (res->buf->base.target == PIPE_BUFFER &&
           access & PIPE_IMAGE_ACCESS_WRITE)
-         util_range_add(&res->buf->valid_buffer_range,
+         util_range_add(&res->buf->base, &res->buf->valid_buffer_range,
                         tic->pipe.u.buf.offset,
                         tic->pipe.u.buf.offset + tic->pipe.u.buf.size);
       list_add(&res->list, &nvc0->img_head);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 91c2671..146eeb3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -448,7 +448,7 @@
          PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
          PUSH_DATA (push, 0);
          BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
-         util_range_add(&res->valid_buffer_range,
+         util_range_add(&res->base, &res->valid_buffer_range,
                         nvc0->buffers[s][i].buffer_offset,
                         nvc0->buffers[s][i].buffer_offset +
                         nvc0->buffers[s][i].buffer_size);
diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c
index 4908861..01ef5c5 100644
--- a/src/gallium/drivers/panfrost/pan_resource.c
+++ b/src/gallium/drivers/panfrost/pan_resource.c
@@ -680,7 +680,7 @@
         }
 
 
-        util_range_add(&prsrc->valid_buffer_range,
+        util_range_add(&prsrc->base, &prsrc->valid_buffer_range,
                        transfer->box.x,
                        transfer->box.x + transfer->box.width);
 
@@ -699,7 +699,7 @@
         struct panfrost_resource *rsc = pan_resource(transfer->resource);
 
         if (transfer->resource->target == PIPE_BUFFER) {
-                util_range_add(&rsc->valid_buffer_range,
+                util_range_add(&rsc->base, &rsc->valid_buffer_range,
                                transfer->box.x + box->x,
                                transfer->box.x + box->x + box->width);
         } else {
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 5e0e27b..da85538 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -43,7 +43,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&rdst->valid_buffer_range, dst_offset,
+	util_range_add(&rdst->b.b, &rdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
 	dst_offset += rdst->gpu_address;
@@ -93,7 +93,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+	util_range_add(dst, &r600_resource(dst)->valid_buffer_range, offset,
 		       offset + size);
 
 	offset += r600_resource(dst)->gpu_address;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index b831561..ca2ad5e 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1308,7 +1308,7 @@
 	surf->cb_color_view = 0;
 
 	/* Set the buffer range the GPU will have access to: */
-	util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range,
+	util_range_add(pipe_buffer, &r600_resource(pipe_buffer)->valid_buffer_range,
 		       0, pipe_buffer->width0);
 }
 
diff --git a/src/gallium/drivers/r600/r600_buffer_common.c b/src/gallium/drivers/r600/r600_buffer_common.c
index 04f80da..d0f44dc 100644
--- a/src/gallium/drivers/r600/r600_buffer_common.c
+++ b/src/gallium/drivers/r600/r600_buffer_common.c
@@ -498,7 +498,7 @@
 		ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
 	}
 
-	util_range_add(&rbuffer->valid_buffer_range, box->x,
+	util_range_add(&rbuffer->b.b, &rbuffer->valid_buffer_range, box->x,
 		       box->x + box->width);
 }
 
@@ -643,8 +643,8 @@
 	rbuffer->domains = RADEON_DOMAIN_GTT;
 	rbuffer->flags = 0;
 	rbuffer->b.is_user_ptr = true;
-	util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
-	util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
+	util_range_add(&rbuffer->b.b, &rbuffer->valid_buffer_range, 0, templ->width0);
+	util_range_add(&rbuffer->b.b, &rbuffer->b.valid_buffer_range, 0, templ->width0);
 
 	/* Convert a user pointer to a buffer. */
 	rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index abf5d03..494b7ed 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -510,7 +510,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+	util_range_add(dst, &r600_resource(dst)->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
 	dst_offset += r600_resource(dst)->gpu_address;
@@ -592,7 +592,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&rdst->valid_buffer_range, dst_offset,
+	util_range_add(&rdst->b.b, &rdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
 	size >>= 2; /* convert to dwords */
diff --git a/src/gallium/drivers/r600/r600_streamout.c b/src/gallium/drivers/r600/r600_streamout.c
index de3e767..f925c07b 100644
--- a/src/gallium/drivers/r600/r600_streamout.c
+++ b/src/gallium/drivers/r600/r600_streamout.c
@@ -65,7 +65,7 @@
 	t->b.buffer_offset = buffer_offset;
 	t->b.buffer_size = buffer_size;
 
-	util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+	util_range_add(buffer, &rbuffer->valid_buffer_range, buffer_offset,
 		       buffer_offset + buffer_size);
 	return &t->b;
 }
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 47d8afc..f45903a 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -41,7 +41,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&sdst->valid_buffer_range, dst_offset,
+	util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
 	dst_offset += sdst->gpu_address;
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 57b8aee..4fda5cb 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -590,7 +590,7 @@
 			       box->x, src_offset, box->width);
 	}
 
-	util_range_add(&buf->valid_buffer_range, box->x,
+	util_range_add(&buf->b.b, &buf->valid_buffer_range, box->x,
 		       box->x + box->width);
 }
 
@@ -744,8 +744,8 @@
 	buf->domains = RADEON_DOMAIN_GTT;
 	buf->flags = 0;
 	buf->b.is_user_ptr = true;
-	util_range_add(&buf->valid_buffer_range, 0, templ->width0);
-	util_range_add(&buf->b.valid_buffer_range, 0, templ->width0);
+	util_range_add(&buf->b.b, &buf->valid_buffer_range, 0, templ->width0);
+	util_range_add(&buf->b.b, &buf->b.valid_buffer_range, 0, templ->width0);
 
 	/* Convert a user pointer to a buffer. */
 	buf->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index d1b44b8..974f2b2 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -222,7 +222,7 @@
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
 	if (sdst)
-		util_range_add(&sdst->valid_buffer_range, offset, offset + size);
+		util_range_add(dst, &sdst->valid_buffer_range, offset, offset + size);
 
 	/* Flush the caches. */
 	if (sdst && !(user_flags & SI_CPDMA_SKIP_GFX_SYNC)) {
@@ -325,7 +325,7 @@
 			/* Mark the buffer range of destination as valid (initialized),
 			 * so that transfer_map knows it should wait for the GPU when mapping
 			 * that range. */
-			util_range_add(&si_resource(dst)->valid_buffer_range, dst_offset,
+			util_range_add(dst, &si_resource(dst)->valid_buffer_range, dst_offset,
 				       dst_offset + size);
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a5769df..28fe5c1 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -692,7 +692,7 @@
 	if (res->b.b.target != PIPE_BUFFER)
 		return;
 
-	util_range_add(&res->valid_buffer_range,
+	util_range_add(&res->b.b, &res->valid_buffer_range,
 		       view->u.buf.offset,
 		       view->u.buf.offset + view->u.buf.size);
 }
@@ -1395,7 +1395,7 @@
 	buffers->enabled_mask |= 1u << slot;
 	sctx->descriptors_dirty |= 1u << descriptors_idx;
 
-	util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
+	util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
 		       sbuffer->buffer_offset + sbuffer->buffer_size);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 450ed82..1a5962e 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -43,7 +43,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&sdst->valid_buffer_range, dst_offset,
+	util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
 	dst_offset += sdst->gpu_address;
diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c
index 8bc5d0e..d7b7681 100644
--- a/src/gallium/drivers/radeonsi/si_dma_cs.c
+++ b/src/gallium/drivers/radeonsi/si_dma_cs.c
@@ -50,7 +50,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&dst->valid_buffer_range, offset, offset + 8);
+	util_range_add(&dst->b.b, &dst->valid_buffer_range, offset, offset + 8);
 
 	assert(va % 8 == 0);
 
@@ -83,7 +83,7 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&sdst->valid_buffer_range, offset, offset + size);
+	util_range_add(dst, &sdst->valid_buffer_range, offset, offset + size);
 
 	offset += sdst->gpu_address;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 9b1d05f..85ac4a1 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -65,7 +65,7 @@
 	t->b.buffer_offset = buffer_offset;
 	t->b.buffer_size = buffer_size;
 
-	util_range_add(&buf->valid_buffer_range, buffer_offset,
+	util_range_add(&buf->b.b, &buf->valid_buffer_range, buffer_offset,
 		       buffer_offset + buffer_size);
 	return &t->b;
 }
diff --git a/src/gallium/drivers/virgl/virgl_buffer.c b/src/gallium/drivers/virgl/virgl_buffer.c
index 9f999fe..ba32c29 100644
--- a/src/gallium/drivers/virgl/virgl_buffer.c
+++ b/src/gallium/drivers/virgl/virgl_buffer.c
@@ -71,7 +71,7 @@
     *
     * We'll end up flushing 25 --> 70.
     */
-   util_range_add(&trans->range, box->x, box->x + box->width);
+   util_range_add(transfer->resource, &trans->range, box->x, box->x + box->width);
 }
 
 static const struct u_resource_vtbl virgl_buffer_vtbl =
diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c
index bbb5247..0554570 100644
--- a/src/gallium/drivers/virgl/virgl_context.c
+++ b/src/gallium/drivers/virgl/virgl_context.c
@@ -1116,7 +1116,7 @@
    struct virgl_resource *sres = virgl_resource(src);
 
    if (dres->u.b.target == PIPE_BUFFER)
-      util_range_add(&dres->valid_buffer_range, dstx, dstx + src_box->width);
+      util_range_add(&dres->u.b, &dres->valid_buffer_range, dstx, dstx + src_box->width);
    virgl_resource_dirty(dres, dst_level);
 
    virgl_encode_resource_copy_region(vctx, dres,
diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c
index 096b7e9..5d2f7df 100644
--- a/src/gallium/drivers/virgl/virgl_encode.c
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -1213,7 +1213,7 @@
          virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
          virgl_encoder_write_res(ctx, res);
 
-         util_range_add(&res->valid_buffer_range, buffers[i].buffer_offset,
+         util_range_add(&res->u.b, &res->valid_buffer_range, buffers[i].buffer_offset,
                buffers[i].buffer_offset + buffers[i].buffer_size);
          virgl_resource_dirty(res, 0);
       } else {
@@ -1240,7 +1240,7 @@
          virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_size);
          virgl_encoder_write_res(ctx, res);
 
-         util_range_add(&res->valid_buffer_range, buffers[i].buffer_offset,
+         util_range_add(&res->u.b, &res->valid_buffer_range, buffers[i].buffer_offset,
                buffers[i].buffer_offset + buffers[i].buffer_size);
          virgl_resource_dirty(res, 0);
       } else {
@@ -1272,7 +1272,7 @@
          virgl_encoder_write_res(ctx, res);
 
          if (res->u.b.target == PIPE_BUFFER) {
-            util_range_add(&res->valid_buffer_range, images[i].u.buf.offset,
+            util_range_add(&res->u.b, &res->valid_buffer_range, images[i].u.buf.offset,
                   images[i].u.buf.offset + images[i].u.buf.size);
          }
          virgl_resource_dirty(res, images[i].u.tex.level);
diff --git a/src/gallium/drivers/virgl/virgl_query.c b/src/gallium/drivers/virgl/virgl_query.c
index e8ef455..9d6989c 100644
--- a/src/gallium/drivers/virgl/virgl_query.c
+++ b/src/gallium/drivers/virgl/virgl_query.c
@@ -114,7 +114,7 @@
    query->result_size = (query_type == PIPE_QUERY_TIMESTAMP ||
                          query_type == PIPE_QUERY_TIME_ELAPSED) ? 8 : 4;
 
-   util_range_add(&query->buf->valid_buffer_range, 0,
+   util_range_add(&query->buf->u.b, &query->buf->valid_buffer_range, 0,
                   sizeof(struct virgl_host_query_state));
    virgl_resource_dirty(query->buf, 0);
 
diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c
index fa0eb70..b5d82bd 100644
--- a/src/gallium/drivers/virgl/virgl_resource.c
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -446,7 +446,7 @@
       }
 
       if (usage & PIPE_TRANSFER_WRITE)
-          util_range_add(&vres->valid_buffer_range, box->x, box->x + box->width);
+          util_range_add(&vres->u.b, &vres->valid_buffer_range, box->x, box->x + box->width);
    }
 
    *transfer = &trans->base;
@@ -608,7 +608,7 @@
        likely(!(virgl_debug & VIRGL_DEBUG_XFER)) &&
        virgl_transfer_queue_extend_buffer(&vctx->queue,
                                           vbuf->hw_res, offset, size, data)) {
-      util_range_add(&vbuf->valid_buffer_range, offset, offset + size);
+      util_range_add(&vbuf->u.b, &vbuf->valid_buffer_range, offset, offset + size);
       return;
    }
 
diff --git a/src/gallium/drivers/virgl/virgl_streamout.c b/src/gallium/drivers/virgl/virgl_streamout.c
index edd5bc5..ba601f9 100644
--- a/src/gallium/drivers/virgl/virgl_streamout.c
+++ b/src/gallium/drivers/virgl/virgl_streamout.c
@@ -50,7 +50,7 @@
    t->handle = handle;
 
    res->bind_history |= PIPE_BIND_STREAM_OUTPUT;
-   util_range_add(&res->valid_buffer_range, buffer_offset,
+   util_range_add(&res->u.b, &res->valid_buffer_range, buffer_offset,
                   buffer_offset + buffer_size);
    virgl_resource_dirty(res, 0);
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 1dc880b..9e579e4 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -494,6 +494,7 @@
 #define PIPE_RESOURCE_FLAG_MAP_COHERENT   (1 << 1)
 #define PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY (1 << 2)
 #define PIPE_RESOURCE_FLAG_SPARSE                (1 << 3)
+#define PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE     (1 << 4)
 #define PIPE_RESOURCE_FLAG_DRV_PRIV    (1 << 8) /* driver/winsys private */
 #define PIPE_RESOURCE_FLAG_ST_PRIV     (1 << 24) /* state-tracker/winsys private */