src/gallium/drivers/svga/svga_pipe_query.c - platform/external/mesa3d - Git at Google

 /**********************************************************
  * Copyright 2008-2015 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
  * files (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy,
  * modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be
  * included in all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
  **********************************************************/

 #include "pipe/p_state.h"
 #include "pipe/p_context.h"

 #include "util/u_bitmask.h"
 #include "util/u_memory.h"

 #include "svga_cmd.h"
 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_resource_buffer.h"
 #include "svga_winsys.h"
 #include "svga_debug.h"


 /* Fixme: want a public base class for all pipe structs, even if there
  * isn't much in them.
  */
 struct pipe_query {
    int dummy;
 };

 struct svga_query {
    struct pipe_query base;
    unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
    SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */

    unsigned id;                    /** Per-context query identifier */

    struct pipe_fence_handle *fence;

    /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */

    /* For VGPU9 */
    struct svga_winsys_buffer *hwbuf;
    volatile SVGA3dQueryResult *queryResult;

    /** For VGPU10 */
    struct svga_winsys_gb_query *gb_query;
    SVGA3dDXQueryFlags flags;
    unsigned offset;                /**< offset to the gb_query memory */
    struct pipe_query *predicate;   /** The associated query that can be used for predicate */

    /** For non-GPU SVGA_QUERY_x queries */
    uint64_t begin_count, end_count;
 };


 /** cast wrapper */
 static inline struct svga_query *
 svga_query(struct pipe_query *q)
 {
    return (struct svga_query *)q;
 }

 /**
  * VGPU9
  */

 static boolean
 svga_get_query_result(struct pipe_context *pipe,
                       struct pipe_query *q,
                       boolean wait,
                       union pipe_query_result *result);

 static enum pipe_error
 define_query_vgpu9(struct svga_context *svga,
                    struct svga_query *sq)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;

    sq->hwbuf = svga_winsys_buffer_create(svga, 1,
                                          SVGA_BUFFER_USAGE_PINNED,
                                          sizeof *sq->queryResult);
    if (!sq->hwbuf)
       return PIPE_ERROR_OUT_OF_MEMORY;

    sq->queryResult = (SVGA3dQueryResult *)
                      sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
    if (!sq->queryResult) {
       sws->buffer_destroy(sws, sq->hwbuf);
       return PIPE_ERROR_OUT_OF_MEMORY;
    }

    sq->queryResult->totalSize = sizeof *sq->queryResult;
    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;

    /* We request the buffer to be pinned and assume it is always mapped.
     * The reason is that we don't want to wait for fences when checking the
     * query status.
     */
    sws->buffer_unmap(sws, sq->hwbuf);

    return PIPE_OK;
 }

 static enum pipe_error
 begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    enum pipe_error ret = PIPE_OK;

    if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
       /* The application doesn't care for the pending query result.
        * We cannot let go of the existing buffer and just get a new one
        * because its storage may be reused for other purposes and clobbered
        * by the host when it determines the query result.  So the only
        * option here is to wait for the existing query's result -- not a
        * big deal, given that no sane application would do this.
        */
        uint64_t result;
        svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
        assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
    }

    sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
    sws->fence_reference(sws, &sq->fence, NULL);

    ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
    }
    return ret;
 }

 static enum pipe_error
 end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
 {
    enum pipe_error ret = PIPE_OK;

    /* Set to PENDING before sending EndQuery. */
    sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;

    ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
    }
    return ret;
 }

 static boolean
 get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
                        boolean wait, uint64_t *result)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    enum pipe_error ret;
    SVGA3dQueryState state;

    if (!sq->fence) {
       /* The query status won't be updated by the host unless
        * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
        * a synchronous wait on the host.
        */
       ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
       if (ret != PIPE_OK) {
          svga_context_flush(svga, NULL);
          ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
       }
       assert (ret == PIPE_OK);
       svga_context_flush(svga, &sq->fence);
       assert(sq->fence);
    }

    state = sq->queryResult->state;
    if (state == SVGA3D_QUERYSTATE_PENDING) {
       if (!wait)
          return FALSE;
       sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
       state = sq->queryResult->state;
    }

    assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
           state == SVGA3D_QUERYSTATE_FAILED);

    *result = (uint64_t)sq->queryResult->result32;
    return TRUE;
 }


 /**
  * VGPU10
  *
  * There is one query mob allocated for each context to be shared by all
  * query types. The mob is used to hold queries's state and result. Since
  * each query result type is of different length, to ease the query allocation
  * management, the mob is divided into memory blocks. Each memory block
  * will hold queries of the same type. Multiple memory blocks can be allocated
  * for a particular query type.
  *
  * Currently each memory block is of 184 bytes. We support up to 128
  * memory blocks. The query memory size is arbitrary right now.
  * Each occlusion query takes about 8 bytes. One memory block can accomodate
  * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
  * queries. That seems reasonable for now. If we think this limit is
  * not enough, we can increase the limit or try to grow the mob in runtime.
  * Note, SVGA device does not impose one mob per context for queries,
  * we could allocate multiple mobs for queries; however, wddm KMD does not
  * currently support that.
  *
  * Also note that the GL guest driver does not issue any of the
  * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
  */
 #define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
 #define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)

 struct svga_qmem_alloc_entry
 {
    unsigned start_offset;               /* start offset of the memory block */
    unsigned block_index;                /* block index of the memory block */
    unsigned query_size;                 /* query size in this memory block */
    unsigned nquery;                     /* number of queries allocated */
    struct util_bitmask *alloc_mask;     /* allocation mask */
    struct svga_qmem_alloc_entry *next;  /* next memory block */
 };


 /**
  * Allocate a memory block from the query object memory
  * \return -1 if out of memory, else index of the query memory block
  */
 static int
 allocate_query_block(struct svga_context *svga)
 {
    int index;
    unsigned offset;

    /* Find the next available query block */
    index = util_bitmask_add(svga->gb_query_alloc_mask);

    if (index == UTIL_BITMASK_INVALID_INDEX)
       return -1;

    offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
    if (offset >= svga->gb_query_len) {
       unsigned i;

       /**
        * All the memory blocks are allocated, lets see if there is
        * any empty memory block around that can be freed up.
        */
       index = -1;
       for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) {
          struct svga_qmem_alloc_entry *alloc_entry;
          struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;

          alloc_entry = svga->gb_query_map[i];
          while (alloc_entry && index == -1) {
             if (alloc_entry->nquery == 0) {
                /* This memory block is empty, it can be recycled. */
                if (prev_alloc_entry) {
                   prev_alloc_entry->next = alloc_entry->next;
                } else {
                   svga->gb_query_map[i] = alloc_entry->next;
                }
                index = alloc_entry->block_index;
             } else {
                prev_alloc_entry = alloc_entry;
                alloc_entry = alloc_entry->next;
             }
          }
       }
    }

    return index;
 }

 /**
  * Allocate a slot in the specified memory block.
  * All slots in this memory block are of the same size.
  *
  * \return -1 if out of memory, else index of the query slot
  */
 static int
 allocate_query_slot(struct svga_context *svga,
                     struct svga_qmem_alloc_entry *alloc)
 {
    int index;
    unsigned offset;

    /* Find the next available slot */
    index = util_bitmask_add(alloc->alloc_mask);

    if (index == UTIL_BITMASK_INVALID_INDEX)
       return -1;

    offset = index * alloc->query_size;
    if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
       return -1;

    alloc->nquery++;

    return index;
 }

 /**
  * Deallocate the specified slot in the memory block.
  * If all slots are freed up, then deallocate the memory block
  * as well, so it can be allocated for other query type
  */
 static void
 deallocate_query_slot(struct svga_context *svga,
                       struct svga_qmem_alloc_entry *alloc,
                       unsigned index)
 {
    assert(index != UTIL_BITMASK_INVALID_INDEX);

    util_bitmask_clear(alloc->alloc_mask, index);
    alloc->nquery--;

    /**
     * Don't worry about deallocating the empty memory block here.
     * The empty memory block will be recycled when no more memory block
     * can be allocated.
     */
 }

 static struct svga_qmem_alloc_entry *
 allocate_query_block_entry(struct svga_context *svga,
                            unsigned len)
 {
    struct svga_qmem_alloc_entry *alloc_entry;
    int block_index = -1;

    block_index = allocate_query_block(svga);
    if (block_index == -1)
       return NULL;
    alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
    if (!alloc_entry)
       return NULL;

    alloc_entry->block_index = block_index;
    alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
    alloc_entry->nquery = 0;
    alloc_entry->alloc_mask = util_bitmask_create();
    alloc_entry->next = NULL;
    alloc_entry->query_size = len;

    return alloc_entry;
 }

 /**
  * Allocate a memory slot for a query of the specified type.
  * It will first search through the memory blocks that are allocated
  * for the query type. If no memory slot is available, it will try
  * to allocate another memory block within the query object memory for
  * this query type.
  */
 static int
 allocate_query(struct svga_context *svga,
                SVGA3dQueryType type,
                unsigned len)
 {
    struct svga_qmem_alloc_entry *alloc_entry;
    int slot_index = -1;
    unsigned offset;

    assert(type < SVGA3D_QUERYTYPE_MAX);

    alloc_entry = svga->gb_query_map[type];

    if (!alloc_entry) {
       /**
        * No query memory block has been allocated for this query type,
        * allocate one now
        */
       alloc_entry = allocate_query_block_entry(svga, len);
       if (!alloc_entry)
          return -1;
       svga->gb_query_map[type] = alloc_entry;
    }

    /* Allocate a slot within the memory block allocated for this query type */
    slot_index = allocate_query_slot(svga, alloc_entry);

    if (slot_index == -1) {
       /* This query memory block is full, allocate another one */
       alloc_entry = allocate_query_block_entry(svga, len);
       if (!alloc_entry)
          return -1;
       alloc_entry->next = svga->gb_query_map[type];
       svga->gb_query_map[type] = alloc_entry;
       slot_index = allocate_query_slot(svga, alloc_entry);
    }

    assert(slot_index != -1);
    offset = slot_index * len + alloc_entry->start_offset;

    return offset;
 }


 /**
  * Deallocate memory slot allocated for the specified query
  */
 static void
 deallocate_query(struct svga_context *svga,
                  struct svga_query *sq)
 {
    struct svga_qmem_alloc_entry *alloc_entry;
    unsigned slot_index;
    unsigned offset = sq->offset;

    alloc_entry = svga->gb_query_map[sq->svga_type];

    while (alloc_entry) {
       if (offset >= alloc_entry->start_offset &&
           offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {

          /* The slot belongs to this memory block, deallocate it */
          slot_index = (offset - alloc_entry->start_offset) /
                       alloc_entry->query_size;
          deallocate_query_slot(svga, alloc_entry, slot_index);
          alloc_entry = NULL;
       } else {
          alloc_entry = alloc_entry->next;
       }
    }
 }


 /**
  * Destroy the gb query object and all the related query structures
  */
 static void
 destroy_gb_query_obj(struct svga_context *svga)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    unsigned i;

    for (i = 0; i < SVGA3D_QUERYTYPE_MAX; i++) {
       struct svga_qmem_alloc_entry *alloc_entry, *next;
       alloc_entry = svga->gb_query_map[i];
       while (alloc_entry) {
          next = alloc_entry->next;
          util_bitmask_destroy(alloc_entry->alloc_mask);
          FREE(alloc_entry);
          alloc_entry = next;
       }
       svga->gb_query_map[i] = NULL;
    }

    if (svga->gb_query)
       sws->query_destroy(sws, svga->gb_query);
    svga->gb_query = NULL;

    util_bitmask_destroy(svga->gb_query_alloc_mask);
 }

 /**
  * Define query and create the gb query object if it is not already created.
  * There is only one gb query object per context which will be shared by
  * queries of all types.
  */
 static enum pipe_error
 define_query_vgpu10(struct svga_context *svga,
                     struct svga_query *sq, int resultLen)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    int qlen;
    enum pipe_error ret = PIPE_OK;

    SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);

    if (svga->gb_query == NULL) {
       /* Create a gb query object */
       svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
       if (!svga->gb_query)
          return PIPE_ERROR_OUT_OF_MEMORY;
       svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
       memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
       svga->gb_query_alloc_mask = util_bitmask_create();

       /* Bind the query object to the context */
       if (svga->swc->query_bind(svga->swc, svga->gb_query,
                                 SVGA_QUERY_FLAG_SET) != PIPE_OK) {
          svga_context_flush(svga, NULL);
          svga->swc->query_bind(svga->swc, svga->gb_query,
                                SVGA_QUERY_FLAG_SET);
       }
    }

    sq->gb_query = svga->gb_query;

    /* Allocate an integer ID for this query */
    sq->id = util_bitmask_add(svga->query_id_bm);
    if (sq->id == UTIL_BITMASK_INVALID_INDEX)
       return PIPE_ERROR_OUT_OF_MEMORY;

    /* Find a slot for this query in the gb object */
    qlen = resultLen + sizeof(SVGA3dQueryState);
    sq->offset = allocate_query(svga, sq->svga_type, qlen);
    if (sq->offset == -1)
       return PIPE_ERROR_OUT_OF_MEMORY;

    SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
             sq->svga_type, sq->id, sq->offset);

    /**
     * Send SVGA3D commands to define the query
     */
    ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
    }
    if (ret != PIPE_OK)
       return PIPE_ERROR_OUT_OF_MEMORY;

    ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
    }
    assert(ret == PIPE_OK);

    ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
    }
    assert(ret == PIPE_OK);

    return PIPE_OK;
 }

 static enum pipe_error
 destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 {
    enum pipe_error ret;

    ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);

    /* Deallocate the memory slot allocated for this query */
    deallocate_query(svga, sq);

    return ret;
 }


 /**
  * Rebind queryies to the context.
  */
 static void
 rebind_vgpu10_query(struct svga_context *svga)
 {
    if (svga->swc->query_bind(svga->swc, svga->gb_query,
                              SVGA_QUERY_FLAG_REF) != PIPE_OK) {
       svga_context_flush(svga, NULL);
       svga->swc->query_bind(svga->swc, svga->gb_query,
                             SVGA_QUERY_FLAG_REF);
    }

    svga->rebind.flags.query = FALSE;
 }


 static enum pipe_error
 begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    enum pipe_error ret = PIPE_OK;
    int status = 0;

    sws->fence_reference(sws, &sq->fence, NULL);

    /* Initialize the query state to NEW */
    status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
    if (status)
       return PIPE_ERROR;

    if (svga->rebind.flags.query) {
       rebind_vgpu10_query(svga);
    }

    /* Send the BeginQuery command to the device */
    ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
    }
    return ret;
 }

 static enum pipe_error
 end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
 {
    enum pipe_error ret = PIPE_OK;

    if (svga->rebind.flags.query) {
       rebind_vgpu10_query(svga);
    }

    ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
    if (ret != PIPE_OK) {
       svga_context_flush(svga, NULL);
       ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
    }

    return ret;
 }

 static boolean
 get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
                         boolean wait, void *result, int resultLen)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    SVGA3dQueryState queryState;

    if (svga->rebind.flags.query) {
       rebind_vgpu10_query(svga);
    }

    sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);

    if (queryState != SVGA3D_QUERYSTATE_SUCCEEDED && !sq->fence) {
       /* We don't have the query result yet, and the query hasn't been
        * submitted.  We need to submit it now since the GL spec says
        * "Querying the state for a given occlusion query forces that
        * occlusion query to complete within a finite amount of time."
        */
       svga_context_flush(svga, &sq->fence);
    }

    if (queryState == SVGA3D_QUERYSTATE_PENDING ||
        queryState == SVGA3D_QUERYSTATE_NEW) {
       if (!wait)
          return FALSE;
       sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
       sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
    }

    assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
           queryState == SVGA3D_QUERYSTATE_FAILED);

    return TRUE;
 }

 static struct pipe_query *
 svga_create_query(struct pipe_context *pipe,
                   unsigned query_type,
                   unsigned index)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_query *sq;

    assert(query_type < SVGA_QUERY_MAX);

    sq = CALLOC_STRUCT(svga_query);
    if (!sq)
       goto fail;

    /* Allocate an integer ID for the query */
    sq->id = util_bitmask_add(svga->query_id_bm);
    if (sq->id == UTIL_BITMASK_INVALID_INDEX)
       goto fail;

    SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
             query_type, sq, sq->id);

    switch (query_type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
       if (svga_have_vgpu10(svga)) {
          define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));

          /**
           * In OpenGL, occlusion counter query can be used in conditional
           * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
           * be used for predication. Hence, we need to create an occlusion
           * predicate query along with the occlusion counter query. So when
           * the occlusion counter query is used for predication, the associated
           * query of occlusion predicate type will be used
           * in the SetPredication command.
           */
          sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);

       } else {
          define_query_vgpu9(svga, sq);
       }
       break;
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       if (svga_have_vgpu10(svga)) {
          sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
          define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
       } else {
          sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
          define_query_vgpu9(svga, sq);
       }
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
    case PIPE_QUERY_SO_STATISTICS:
       assert(svga_have_vgpu10(svga));
       sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
       define_query_vgpu10(svga, sq,
                           sizeof(SVGADXStreamOutStatisticsQueryResult));
       break;
    case PIPE_QUERY_TIMESTAMP:
       assert(svga_have_vgpu10(svga));
       sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
       define_query_vgpu10(svga, sq,
                           sizeof(SVGADXTimestampQueryResult));
       break;
    case SVGA_QUERY_NUM_DRAW_CALLS:
    case SVGA_QUERY_NUM_FALLBACKS:
    case SVGA_QUERY_NUM_FLUSHES:
    case SVGA_QUERY_NUM_VALIDATIONS:
    case SVGA_QUERY_NUM_BUFFERS_MAPPED:
    case SVGA_QUERY_NUM_TEXTURES_MAPPED:
    case SVGA_QUERY_NUM_BYTES_UPLOADED:
    case SVGA_QUERY_COMMAND_BUFFER_SIZE:
    case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
    case SVGA_QUERY_MEMORY_USED:
    case SVGA_QUERY_NUM_SHADERS:
    case SVGA_QUERY_NUM_RESOURCES:
    case SVGA_QUERY_NUM_STATE_OBJECTS:
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
    case SVGA_QUERY_NUM_GENERATE_MIPMAP:
    case SVGA_QUERY_NUM_READBACKS:
    case SVGA_QUERY_NUM_RESOURCE_UPDATES:
    case SVGA_QUERY_NUM_BUFFER_UPLOADS:
    case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
    case SVGA_QUERY_NUM_CONST_UPDATES:
       break;
    case SVGA_QUERY_FLUSH_TIME:
    case SVGA_QUERY_MAP_BUFFER_TIME:
       /* These queries need os_time_get() */
       svga->hud.uses_time = TRUE;
       break;
    default:
       assert(!"unexpected query type in svga_create_query()");
    }

    sq->type = query_type;

    return &sq->base;

 fail:
    FREE(sq);
    return NULL;
 }

 static void
 svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    struct svga_query *sq;

    if (!q) {
       destroy_gb_query_obj(svga);
       return;
    }

    sq = svga_query(q);

    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
             sq, sq->id);

    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       if (svga_have_vgpu10(svga)) {
          /* make sure to also destroy any associated predicate query */
          if (sq->predicate)
             svga_destroy_query(pipe, sq->predicate);
          destroy_query_vgpu10(svga, sq);
       } else {
          sws->buffer_destroy(sws, sq->hwbuf);
       }
       sws->fence_reference(sws, &sq->fence, NULL);
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
    case PIPE_QUERY_SO_STATISTICS:
    case PIPE_QUERY_TIMESTAMP:
       assert(svga_have_vgpu10(svga));
       destroy_query_vgpu10(svga, sq);
       sws->fence_reference(sws, &sq->fence, NULL);
       break;
    case SVGA_QUERY_NUM_DRAW_CALLS:
    case SVGA_QUERY_NUM_FALLBACKS:
    case SVGA_QUERY_NUM_FLUSHES:
    case SVGA_QUERY_NUM_VALIDATIONS:
    case SVGA_QUERY_MAP_BUFFER_TIME:
    case SVGA_QUERY_NUM_BUFFERS_MAPPED:
    case SVGA_QUERY_NUM_TEXTURES_MAPPED:
    case SVGA_QUERY_NUM_BYTES_UPLOADED:
    case SVGA_QUERY_COMMAND_BUFFER_SIZE:
    case SVGA_QUERY_FLUSH_TIME:
    case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
    case SVGA_QUERY_MEMORY_USED:
    case SVGA_QUERY_NUM_SHADERS:
    case SVGA_QUERY_NUM_RESOURCES:
    case SVGA_QUERY_NUM_STATE_OBJECTS:
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
    case SVGA_QUERY_NUM_GENERATE_MIPMAP:
    case SVGA_QUERY_NUM_READBACKS:
    case SVGA_QUERY_NUM_RESOURCE_UPDATES:
    case SVGA_QUERY_NUM_BUFFER_UPLOADS:
    case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
    case SVGA_QUERY_NUM_CONST_UPDATES:
       /* nothing */
       break;
    default:
       assert(!"svga: unexpected query type in svga_destroy_query()");
    }

    /* Free the query id */
    util_bitmask_clear(svga->query_id_bm, sq->id);

    FREE(sq);
 }


 static boolean
 svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_query *sq = svga_query(q);
    enum pipe_error ret;

    assert(sq);
    assert(sq->type < SVGA_QUERY_MAX);

    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
             sq, sq->id);

    /* Need to flush out buffered drawing commands so that they don't
     * get counted in the query results.
     */
    svga_hwtnl_flush_retry(svga);

    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       if (svga_have_vgpu10(svga)) {
          ret = begin_query_vgpu10(svga, sq);
          /* also need to start the associated occlusion predicate query */
          if (sq->predicate) {
             enum pipe_error status;
             status = begin_query_vgpu10(svga, svga_query(sq->predicate));
             assert(status == PIPE_OK);
             (void) status;
          }
       } else {
          ret = begin_query_vgpu9(svga, sq);
       }
       assert(ret == PIPE_OK);
       (void) ret;
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
    case PIPE_QUERY_SO_STATISTICS:
    case PIPE_QUERY_TIMESTAMP:
       assert(svga_have_vgpu10(svga));
       ret = begin_query_vgpu10(svga, sq);
       assert(ret == PIPE_OK);
       break;
    case SVGA_QUERY_NUM_DRAW_CALLS:
       sq->begin_count = svga->hud.num_draw_calls;
       break;
    case SVGA_QUERY_NUM_FALLBACKS:
       sq->begin_count = svga->hud.num_fallbacks;
       break;
    case SVGA_QUERY_NUM_FLUSHES:
       sq->begin_count = svga->hud.num_flushes;
       break;
    case SVGA_QUERY_NUM_VALIDATIONS:
       sq->begin_count = svga->hud.num_validations;
       break;
    case SVGA_QUERY_MAP_BUFFER_TIME:
       sq->begin_count = svga->hud.map_buffer_time;
       break;
    case SVGA_QUERY_NUM_BUFFERS_MAPPED:
       sq->begin_count = svga->hud.num_buffers_mapped;
       break;
    case SVGA_QUERY_NUM_TEXTURES_MAPPED:
       sq->begin_count = svga->hud.num_textures_mapped;
       break;
    case SVGA_QUERY_NUM_BYTES_UPLOADED:
       sq->begin_count = svga->hud.num_bytes_uploaded;
       break;
    case SVGA_QUERY_COMMAND_BUFFER_SIZE:
       sq->begin_count = svga->hud.command_buffer_size;
       break;
    case SVGA_QUERY_FLUSH_TIME:
       sq->begin_count = svga->hud.flush_time;
       break;
    case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
       sq->begin_count = svga->hud.surface_write_flushes;
       break;
    case SVGA_QUERY_NUM_READBACKS:
       sq->begin_count = svga->hud.num_readbacks;
       break;
    case SVGA_QUERY_NUM_RESOURCE_UPDATES:
       sq->begin_count = svga->hud.num_resource_updates;
       break;
    case SVGA_QUERY_NUM_BUFFER_UPLOADS:
       sq->begin_count = svga->hud.num_buffer_uploads;
       break;
    case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
       sq->begin_count = svga->hud.num_const_buf_updates;
       break;
    case SVGA_QUERY_NUM_CONST_UPDATES:
       sq->begin_count = svga->hud.num_const_updates;
       break;
    case SVGA_QUERY_MEMORY_USED:
    case SVGA_QUERY_NUM_SHADERS:
    case SVGA_QUERY_NUM_RESOURCES:
    case SVGA_QUERY_NUM_STATE_OBJECTS:
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
    case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       /* nothing */
       break;
    default:
       assert(!"unexpected query type in svga_begin_query()");
    }

    svga->sq[sq->type] = sq;

    return true;
 }


 static bool
 svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_query *sq = svga_query(q);
    enum pipe_error ret;

    assert(sq);
    assert(sq->type < SVGA_QUERY_MAX);

    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
             sq, sq->id);

    if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
       svga_begin_query(pipe, q);

    svga_hwtnl_flush_retry(svga);

    assert(svga->sq[sq->type] == sq);

    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
       if (svga_have_vgpu10(svga)) {
          ret = end_query_vgpu10(svga, sq);
          /* also need to end the associated occlusion predicate query */
          if (sq->predicate) {
             enum pipe_error status;
             status = end_query_vgpu10(svga, svga_query(sq->predicate));
             assert(status == PIPE_OK);
             (void) status;
          }
       } else {
          ret = end_query_vgpu9(svga, sq);
       }
       assert(ret == PIPE_OK);
       (void) ret;
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
    case PIPE_QUERY_SO_STATISTICS:
    case PIPE_QUERY_TIMESTAMP:
       assert(svga_have_vgpu10(svga));
       ret = end_query_vgpu10(svga, sq);
       assert(ret == PIPE_OK);
       break;
    case SVGA_QUERY_NUM_DRAW_CALLS:
       sq->end_count = svga->hud.num_draw_calls;
       break;
    case SVGA_QUERY_NUM_FALLBACKS:
       sq->end_count = svga->hud.num_fallbacks;
       break;
    case SVGA_QUERY_NUM_FLUSHES:
       sq->end_count = svga->hud.num_flushes;
       break;
    case SVGA_QUERY_NUM_VALIDATIONS:
       sq->end_count = svga->hud.num_validations;
       break;
    case SVGA_QUERY_MAP_BUFFER_TIME:
       sq->end_count = svga->hud.map_buffer_time;
       break;
    case SVGA_QUERY_NUM_BUFFERS_MAPPED:
       sq->end_count = svga->hud.num_buffers_mapped;
       break;
    case SVGA_QUERY_NUM_TEXTURES_MAPPED:
       sq->end_count = svga->hud.num_textures_mapped;
       break;
    case SVGA_QUERY_NUM_BYTES_UPLOADED:
       sq->end_count = svga->hud.num_bytes_uploaded;
       break;
    case SVGA_QUERY_COMMAND_BUFFER_SIZE:
       sq->end_count = svga->hud.command_buffer_size;
       break;
    case SVGA_QUERY_FLUSH_TIME:
       sq->end_count = svga->hud.flush_time;
       break;
    case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
       sq->end_count = svga->hud.surface_write_flushes;
       break;
    case SVGA_QUERY_NUM_READBACKS:
       sq->end_count = svga->hud.num_readbacks;
       break;
    case SVGA_QUERY_NUM_RESOURCE_UPDATES:
       sq->end_count = svga->hud.num_resource_updates;
       break;
    case SVGA_QUERY_NUM_BUFFER_UPLOADS:
       sq->end_count = svga->hud.num_buffer_uploads;
       break;
    case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
       sq->end_count = svga->hud.num_const_buf_updates;
       break;
    case SVGA_QUERY_NUM_CONST_UPDATES:
       sq->end_count = svga->hud.num_const_updates;
       break;
    case SVGA_QUERY_MEMORY_USED:
    case SVGA_QUERY_NUM_SHADERS:
    case SVGA_QUERY_NUM_RESOURCES:
    case SVGA_QUERY_NUM_STATE_OBJECTS:
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
    case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       /* nothing */
       break;
    default:
       assert(!"unexpected query type in svga_end_query()");
    }
    svga->sq[sq->type] = NULL;
    return true;
 }


 static boolean
 svga_get_query_result(struct pipe_context *pipe,
                       struct pipe_query *q,
                       boolean wait,
                       union pipe_query_result *vresult)
 {
    struct svga_screen *svgascreen = svga_screen(pipe->screen);
    struct svga_context *svga = svga_context(pipe);
    struct svga_query *sq = svga_query(q);
    uint64_t *result = (uint64_t *)vresult;
    boolean ret = TRUE;

    assert(sq);

    SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
             __FUNCTION__, sq, sq->id, wait);

    switch (sq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       if (svga_have_vgpu10(svga)) {
          SVGADXOcclusionQueryResult occResult;
          ret = get_query_result_vgpu10(svga, sq, wait,
                                        (void *)&occResult, sizeof(occResult));
          *result = (uint64_t)occResult.samplesRendered;
       } else {
          ret = get_query_result_vgpu9(svga, sq, wait, result);
       }
       break;
    case PIPE_QUERY_OCCLUSION_PREDICATE: {
       if (svga_have_vgpu10(svga)) {
          SVGADXOcclusionPredicateQueryResult occResult;
          ret = get_query_result_vgpu10(svga, sq, wait,
                                        (void *)&occResult, sizeof(occResult));
          vresult->b = occResult.anySamplesRendered != 0;
       } else {
          uint64_t count = 0;
          ret = get_query_result_vgpu9(svga, sq, wait, &count);
          vresult->b = count != 0;
       }
       break;
    }
    case PIPE_QUERY_SO_STATISTICS: {
       SVGADXStreamOutStatisticsQueryResult sResult;
       struct pipe_query_data_so_statistics *pResult =
          (struct pipe_query_data_so_statistics *)vresult;

       assert(svga_have_vgpu10(svga));
       ret = get_query_result_vgpu10(svga, sq, wait,
                                     (void *)&sResult, sizeof(sResult));
       pResult->num_primitives_written = sResult.numPrimitivesWritten;
       pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
       break;
    }
    case PIPE_QUERY_TIMESTAMP: {
       SVGADXTimestampQueryResult sResult;

       assert(svga_have_vgpu10(svga));
       ret = get_query_result_vgpu10(svga, sq, wait,
                                     (void *)&sResult, sizeof(sResult));
       *result = (uint64_t)sResult.timestamp;
       break;
    }
    case PIPE_QUERY_PRIMITIVES_GENERATED: {
       SVGADXStreamOutStatisticsQueryResult sResult;

       assert(svga_have_vgpu10(svga));
       ret = get_query_result_vgpu10(svga, sq, wait,
                                     (void *)&sResult, sizeof sResult);
       *result = (uint64_t)sResult.numPrimitivesRequired;
       break;
    }
    case PIPE_QUERY_PRIMITIVES_EMITTED: {
       SVGADXStreamOutStatisticsQueryResult sResult;

       assert(svga_have_vgpu10(svga));
       ret = get_query_result_vgpu10(svga, sq, wait,
                                     (void *)&sResult, sizeof sResult);
       *result = (uint64_t)sResult.numPrimitivesWritten;
       break;
    }
    /* These are per-frame counters */
    case SVGA_QUERY_NUM_DRAW_CALLS:
    case SVGA_QUERY_NUM_FALLBACKS:
    case SVGA_QUERY_NUM_FLUSHES:
    case SVGA_QUERY_NUM_VALIDATIONS:
    case SVGA_QUERY_MAP_BUFFER_TIME:
    case SVGA_QUERY_NUM_BUFFERS_MAPPED:
    case SVGA_QUERY_NUM_TEXTURES_MAPPED:
    case SVGA_QUERY_NUM_BYTES_UPLOADED:
    case SVGA_QUERY_COMMAND_BUFFER_SIZE:
    case SVGA_QUERY_FLUSH_TIME:
    case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
    case SVGA_QUERY_NUM_READBACKS:
    case SVGA_QUERY_NUM_RESOURCE_UPDATES:
    case SVGA_QUERY_NUM_BUFFER_UPLOADS:
    case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
    case SVGA_QUERY_NUM_CONST_UPDATES:
       vresult->u64 = sq->end_count - sq->begin_count;
       break;
    /* These are running total counters */
    case SVGA_QUERY_MEMORY_USED:
       vresult->u64 = svgascreen->hud.total_resource_bytes;
       break;
    case SVGA_QUERY_NUM_SHADERS:
       vresult->u64 = svga->hud.num_shaders;
       break;
    case SVGA_QUERY_NUM_RESOURCES:
       vresult->u64 = svgascreen->hud.num_resources;
       break;
    case SVGA_QUERY_NUM_STATE_OBJECTS:
       vresult->u64 = (svga->hud.num_blend_objects +
                       svga->hud.num_depthstencil_objects +
                       svga->hud.num_rasterizer_objects +
                       svga->hud.num_sampler_objects +
                       svga->hud.num_samplerview_objects +
                       svga->hud.num_vertexelement_objects);
       break;
    case SVGA_QUERY_NUM_SURFACE_VIEWS:
       vresult->u64 = svga->hud.num_surface_views;
       break;
    case SVGA_QUERY_NUM_GENERATE_MIPMAP:
       vresult->u64 = svga->hud.num_generate_mipmap;
       break;
    default:
       assert(!"unexpected query type in svga_get_query_result");
    }

    SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));

    return ret;
 }

 static void
 svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
                       boolean condition, uint mode)
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    struct svga_query *sq = svga_query(q);
    SVGA3dQueryId queryId;
    enum pipe_error ret;

    SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);

    assert(svga_have_vgpu10(svga));
    if (sq == NULL) {
       queryId = SVGA3D_INVALID_ID;
    }
    else {
       assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
              sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);

       if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
          assert(sq->predicate);
          /**
           * For conditional rendering, make sure to use the associated
           * predicate query.
           */
          sq = svga_query(sq->predicate);
       }
       queryId = sq->id;

       if ((mode == PIPE_RENDER_COND_WAIT ||
            mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
          sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
       }
    }
    /*
     * if the kernel module doesn't support the predication command,
     * we'll just render unconditionally.
     * This is probably acceptable for the typical case of occlusion culling.
     */
    if (sws->have_set_predication_cmd) {
       ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
                                          (uint32) condition);
       if (ret != PIPE_OK) {
          svga_context_flush(svga, NULL);
          ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
                                             (uint32) condition);
       }
       svga->pred.query_id = queryId;
       svga->pred.cond = condition;
    }

    svga->render_condition = (sq != NULL);
 }


 /*
  * This function is a workaround because we lack the ability to query
  * renderer's time synchornously.
  */
 static uint64_t
 svga_get_timestamp(struct pipe_context *pipe)
 {
    struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
    union pipe_query_result result;

    svga_begin_query(pipe, q);
    svga_end_query(pipe,q);
    svga_get_query_result(pipe, q, TRUE, &result);
    svga_destroy_query(pipe, q);

    return result.u64;
 }


 static void
 svga_set_active_query_state(struct pipe_context *pipe, boolean enable)
 {
 }


 void
 svga_init_query_functions(struct svga_context *svga)
 {
    svga->pipe.create_query = svga_create_query;
    svga->pipe.destroy_query = svga_destroy_query;
    svga->pipe.begin_query = svga_begin_query;
    svga->pipe.end_query = svga_end_query;
    svga->pipe.get_query_result = svga_get_query_result;
    svga->pipe.set_active_query_state = svga_set_active_query_state;
    svga->pipe.render_condition = svga_render_condition;
    svga->pipe.get_timestamp = svga_get_timestamp;
 }