| /* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 and |
| * only version 2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #include "kgsl.h" |
| #include "kgsl_sharedmem.h" |
| #include "kgsl_snapshot.h" |
| |
| #include "adreno.h" |
| #include "adreno_pm4types.h" |
| #include "a3xx_reg.h" |
| #include "adreno_cp_parser.h" |
| |
| /* Number of dwords of ringbuffer history to record */ |
| #define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100 |
| |
| /* Maintain a list of the objects we see during parsing */ |
| |
| #define SNAPSHOT_OBJ_BUFSIZE 64 |
| |
| #define SNAPSHOT_OBJ_TYPE_IB 0 |
| |
| /* Keep track of how many bytes are frozen after a snapshot and tell the user */ |
| static int snapshot_frozen_objsize; |
| |
| static struct kgsl_snapshot_obj { |
| int type; |
| uint32_t gpuaddr; |
| phys_addr_t ptbase; |
| int dwords; |
| struct kgsl_mem_entry *entry; |
| } objbuf[SNAPSHOT_OBJ_BUFSIZE]; |
| |
| /* Pointer to the next open entry in the object list */ |
| static int objbufptr; |
| |
| /* Push a new buffer object onto the list */ |
| static void push_object(struct kgsl_device *device, int type, |
| phys_addr_t ptbase, |
| uint32_t gpuaddr, int dwords) |
| { |
| int index; |
| struct kgsl_mem_entry *entry; |
| |
| /* |
| * Sometimes IBs can be reused in the same dump. Because we parse from |
| * oldest to newest, if we come across an IB that has already been used, |
| * assume that it has been reused and update the list with the newest |
| * size. |
| */ |
| |
| for (index = 0; index < objbufptr; index++) { |
| if (objbuf[index].gpuaddr == gpuaddr && |
| objbuf[index].ptbase == ptbase) { |
| objbuf[index].dwords = dwords; |
| return; |
| } |
| } |
| |
| if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) { |
| KGSL_DRV_ERR(device, "snapshot: too many snapshot objects\n"); |
| return; |
| } |
| |
| entry = kgsl_get_mem_entry(device, ptbase, gpuaddr, dwords << 2); |
| if (entry == NULL) { |
| KGSL_DRV_ERR(device, |
| "snapshot: Can't find entry for %X\n", gpuaddr); |
| return; |
| } |
| |
| /* Put it on the list of things to parse */ |
| objbuf[objbufptr].type = type; |
| objbuf[objbufptr].gpuaddr = gpuaddr; |
| objbuf[objbufptr].ptbase = ptbase; |
| objbuf[objbufptr].dwords = dwords; |
| objbuf[objbufptr++].entry = entry; |
| } |
| |
| /* |
| * Return a 1 if the specified object is already on the list of buffers |
| * to be dumped |
| */ |
| |
| static int find_object(int type, unsigned int gpuaddr, phys_addr_t ptbase) |
| { |
| int index; |
| |
| for (index = 0; index < objbufptr; index++) { |
| if (objbuf[index].gpuaddr == gpuaddr && |
| objbuf[index].ptbase == ptbase && |
| objbuf[index].type == type) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * snapshot_freeze_obj_list() - Take a list of ib objects and freeze their |
| * memory for snapshot |
| * @device: Device being snapshotted |
| * @ptbase: The pagetable base of the process to which IB belongs |
| * @ib_obj_list: List of the IB objects |
| * |
| * Returns 0 on success else error code |
| */ |
| static int snapshot_freeze_obj_list(struct kgsl_device *device, |
| phys_addr_t ptbase, struct adreno_ib_object_list *ib_obj_list) |
| { |
| int ret = 0; |
| struct adreno_ib_object *ib_objs; |
| unsigned int ib2base; |
| struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| int i; |
| |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BASE, &ib2base); |
| |
| for (i = 0; i < ib_obj_list->num_objs; i++) { |
| int temp_ret; |
| int index; |
| int freeze = 1; |
| |
| ib_objs = &(ib_obj_list->obj_list[i]); |
| /* Make sure this object is not going to be saved statically */ |
| for (index = 0; index < objbufptr; index++) { |
| if ((objbuf[index].gpuaddr <= ib_objs->gpuaddr) && |
| ((objbuf[index].gpuaddr + |
| (objbuf[index].dwords << 2)) >= |
| (ib_objs->gpuaddr + ib_objs->size)) && |
| (objbuf[index].ptbase == ptbase)) { |
| freeze = 0; |
| break; |
| } |
| } |
| |
| if (freeze) { |
| /* Save current IB2 statically */ |
| if (ib2base == ib_objs->gpuaddr) { |
| push_object(device, SNAPSHOT_OBJ_TYPE_IB, |
| ptbase, ib_objs->gpuaddr, ib_objs->size >> 2); |
| } else { |
| temp_ret = kgsl_snapshot_get_object(device, |
| ptbase, ib_objs->gpuaddr, ib_objs->size, |
| ib_objs->snapshot_obj_type); |
| if (temp_ret < 0) { |
| if (ret >= 0) |
| ret = temp_ret; |
| } else { |
| snapshot_frozen_objsize += temp_ret; |
| } |
| } |
| } |
| } |
| return ret; |
| } |
| |
| /* |
| * We want to store the last executed IB1 and IB2 in the static region to ensure |
| * that we get at least some information out of the snapshot even if we can't |
| * access the dynamic data from the sysfs file. Push all other IBs on the |
| * dynamic list |
| */ |
| static inline int parse_ib(struct kgsl_device *device, phys_addr_t ptbase, |
| unsigned int gpuaddr, unsigned int dwords) |
| { |
| struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| unsigned int ib1base; |
| int ret = 0; |
| struct adreno_ib_object_list *ib_obj_list; |
| |
| /* |
| * Check the IB address - if it is either the last executed IB1 |
| * then push it into the static blob otherwise put it in the dynamic |
| * list |
| */ |
| |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ib1base); |
| |
| if (gpuaddr == ib1base) { |
| push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase, |
| gpuaddr, dwords); |
| goto done; |
| } |
| |
| if (kgsl_snapshot_have_object(device, ptbase, gpuaddr, dwords << 2)) |
| goto done; |
| |
| ret = adreno_ib_create_object_list(device, ptbase, |
| gpuaddr, dwords, &ib_obj_list); |
| if (ret) |
| goto done; |
| |
| ret = kgsl_snapshot_add_ib_obj_list(device, ptbase, ib_obj_list); |
| |
| if (ret) |
| adreno_ib_destroy_obj_list(ib_obj_list); |
| done: |
| return ret; |
| } |
| |
| /* Snapshot the ringbuffer memory */ |
| static int snapshot_rb(struct kgsl_device *device, void *snapshot, |
| int remain, void *priv) |
| { |
| struct kgsl_snapshot_rb *header = snapshot; |
| unsigned int *data = snapshot + sizeof(*header); |
| struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; |
| unsigned int rptr, *rbptr, ibbase; |
| phys_addr_t ptbase; |
| int index, size, i; |
| int parse_ibs = 0, ib_parse_start; |
| |
| /* Get the physical address of the MMU pagetable */ |
| ptbase = kgsl_mmu_get_current_ptbase(&device->mmu); |
| |
| /* Get the current read pointers for the RB */ |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); |
| |
| /* Address of the last processed IB */ |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ibbase); |
| |
| /* |
| * Figure out the window of ringbuffer data to dump. First we need to |
| * find where the last processed IB ws submitted. Start walking back |
| * from the rptr |
| */ |
| |
| index = rptr; |
| rbptr = rb->buffer_desc.hostptr; |
| |
| do { |
| index--; |
| |
| if (index < 0) { |
| index = rb->sizedwords - 3; |
| |
| /* We wrapped without finding what we wanted */ |
| if (index < rb->wptr) { |
| index = rb->wptr; |
| break; |
| } |
| } |
| |
| if (adreno_cmd_is_ib(rbptr[index]) && |
| rbptr[index + 1] == ibbase) |
| break; |
| } while (index != rb->wptr); |
| |
| /* |
| * index points at the last submitted IB. We can only trust that the |
| * memory between the context switch and the hanging IB is valid, so |
| * the next step is to find the context switch before the submission |
| */ |
| |
| while (index != rb->wptr) { |
| index--; |
| |
| if (index < 0) { |
| index = rb->sizedwords - 2; |
| |
| /* |
| * Wrapped without finding the context switch. This is |
| * harmless - we should still have enough data to dump a |
| * valid state |
| */ |
| |
| if (index < rb->wptr) { |
| index = rb->wptr; |
| break; |
| } |
| } |
| |
| /* Break if the current packet is a context switch identifier */ |
| if ((rbptr[index] == cp_nop_packet(1)) && |
| (rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER)) |
| break; |
| } |
| |
| /* |
| * Index represents the start of the window of interest. We will try |
| * to dump all buffers between here and the rptr |
| */ |
| |
| ib_parse_start = index; |
| |
| /* |
| * Dump the entire ringbuffer - the parser can choose how much of it to |
| * process |
| */ |
| |
| size = (rb->sizedwords << 2); |
| |
| if (remain < size + sizeof(*header)) { |
| KGSL_DRV_ERR(device, |
| "snapshot: Not enough memory for the rb section"); |
| return 0; |
| } |
| |
| /* Write the sub-header for the section */ |
| header->start = rb->wptr; |
| header->end = rb->wptr; |
| header->wptr = rb->wptr; |
| header->rbsize = rb->sizedwords; |
| header->count = rb->sizedwords; |
| |
| /* |
| * Loop through the RB, copying the data and looking for indirect |
| * buffers and MMU pagetable changes |
| */ |
| |
| index = rb->wptr; |
| for (i = 0; i < rb->sizedwords; i++) { |
| *data = rbptr[index]; |
| |
| /* |
| * Only parse IBs between the start and the rptr or the next |
| * context switch, whichever comes first |
| */ |
| |
| if (parse_ibs == 0 && index == ib_parse_start) |
| parse_ibs = 1; |
| else if (index == rptr || adreno_rb_ctxtswitch(&rbptr[index])) |
| parse_ibs = 0; |
| |
| if (parse_ibs && adreno_cmd_is_ib(rbptr[index])) { |
| unsigned int ibaddr = rbptr[index + 1]; |
| unsigned int ibsize = rbptr[index + 2]; |
| struct kgsl_memdesc *memdesc = NULL; |
| |
| /* IOMMU uses a NOP IB placed in setsate memory */ |
| if (kgsl_gpuaddr_in_memdesc( |
| &device->mmu.setstate_memory, |
| ibaddr, ibsize << 2)) |
| memdesc = &device->mmu.setstate_memory; |
| /* |
| * The IB from CP_IB1_BASE and the IBs for legacy |
| * context switch go into the snapshot all |
| * others get marked at GPU objects |
| */ |
| |
| if (memdesc != NULL) |
| push_object(device, SNAPSHOT_OBJ_TYPE_IB, |
| ptbase, ibaddr, ibsize); |
| else |
| parse_ib(device, ptbase, ibaddr, ibsize); |
| } |
| |
| index = index + 1; |
| |
| if (index == rb->sizedwords) |
| index = 0; |
| |
| data++; |
| } |
| |
| /* Return the size of the section */ |
| return size + sizeof(*header); |
| } |
| |
| static int snapshot_capture_mem_list(struct kgsl_device *device, void *snapshot, |
| int remain, void *priv) |
| { |
| struct kgsl_snapshot_replay_mem_list *header = snapshot; |
| struct kgsl_process_private *private = NULL; |
| struct kgsl_process_private *tmp_private; |
| phys_addr_t ptbase; |
| struct rb_node *node; |
| struct kgsl_mem_entry *entry = NULL; |
| int num_mem; |
| unsigned int *data = snapshot + sizeof(*header); |
| |
| ptbase = kgsl_mmu_get_current_ptbase(&device->mmu); |
| mutex_lock(&kgsl_driver.process_mutex); |
| list_for_each_entry(tmp_private, &kgsl_driver.process_list, list) { |
| if (kgsl_mmu_pt_equal(&device->mmu, tmp_private->pagetable, |
| ptbase)) { |
| private = tmp_private; |
| break; |
| } |
| } |
| mutex_unlock(&kgsl_driver.process_mutex); |
| if (!private) { |
| KGSL_DRV_ERR(device, |
| "Failed to get pointer to process private structure\n"); |
| return 0; |
| } |
| /* We need to know the number of memory objects that the process has */ |
| spin_lock(&private->mem_lock); |
| for (node = rb_first(&private->mem_rb), num_mem = 0; node; ) { |
| entry = rb_entry(node, struct kgsl_mem_entry, node); |
| node = rb_next(&entry->node); |
| num_mem++; |
| } |
| |
| if (remain < ((num_mem * 3 * sizeof(unsigned int)) + |
| sizeof(*header))) { |
| KGSL_DRV_ERR(device, |
| "snapshot: Not enough memory for the mem list section"); |
| spin_unlock(&private->mem_lock); |
| return 0; |
| } |
| header->num_entries = num_mem; |
| header->ptbase = (__u32)ptbase; |
| /* |
| * Walk throught the memory list and store the |
| * tuples(gpuaddr, size, memtype) in snapshot |
| */ |
| for (node = rb_first(&private->mem_rb); node; ) { |
| entry = rb_entry(node, struct kgsl_mem_entry, node); |
| node = rb_next(&entry->node); |
| |
| *data++ = entry->memdesc.gpuaddr; |
| *data++ = entry->memdesc.size; |
| *data++ = (entry->memdesc.flags & KGSL_MEMTYPE_MASK) >> |
| KGSL_MEMTYPE_SHIFT; |
| } |
| spin_unlock(&private->mem_lock); |
| return sizeof(*header) + (num_mem * 3 * sizeof(unsigned int)); |
| } |
| |
| /* Snapshot the memory for an indirect buffer */ |
| static int snapshot_ib(struct kgsl_device *device, void *snapshot, |
| int remain, void *priv) |
| { |
| struct kgsl_snapshot_ib *header = snapshot; |
| struct kgsl_snapshot_obj *obj = priv; |
| struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| unsigned int *src; |
| unsigned int *dst = snapshot + sizeof(*header); |
| struct adreno_ib_object_list *ib_obj_list; |
| unsigned int ib1base; |
| |
| src = kgsl_gpuaddr_to_vaddr(&obj->entry->memdesc, obj->gpuaddr); |
| if (src == NULL) { |
| KGSL_DRV_ERR(device, |
| "snapshot: Unable to map object 0x%X into the kernel\n", |
| obj->gpuaddr); |
| return 0; |
| } |
| |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ib1base); |
| |
| if (remain < (obj->dwords << 2) + sizeof(*header)) { |
| KGSL_DRV_ERR(device, |
| "snapshot: Not enough memory for the ib section"); |
| return 0; |
| } |
| |
| /* only do this for IB1 because the IB2's are part of IB1 objects */ |
| if (ib1base == obj->gpuaddr) { |
| if (!adreno_ib_create_object_list(device, obj->ptbase, |
| obj->gpuaddr, obj->dwords, |
| &ib_obj_list)) { |
| /* freeze the IB objects in the IB */ |
| snapshot_freeze_obj_list(device, obj->ptbase, |
| ib_obj_list); |
| adreno_ib_destroy_obj_list(ib_obj_list); |
| } |
| } |
| |
| /* Write the sub-header for the section */ |
| header->gpuaddr = obj->gpuaddr; |
| header->ptbase = (__u32)obj->ptbase; |
| header->size = obj->dwords; |
| |
| /* Write the contents of the ib */ |
| memcpy((void *)dst, (void *)src, obj->dwords << 2); |
| /* Write the contents of the ib */ |
| |
| return (obj->dwords << 2) + sizeof(*header); |
| } |
| |
| /* Dump another item on the current pending list */ |
| static void *dump_object(struct kgsl_device *device, int obj, void *snapshot, |
| int *remain) |
| { |
| switch (objbuf[obj].type) { |
| case SNAPSHOT_OBJ_TYPE_IB: |
| snapshot = kgsl_snapshot_add_section(device, |
| KGSL_SNAPSHOT_SECTION_IB, snapshot, remain, |
| snapshot_ib, &objbuf[obj]); |
| if (objbuf[obj].entry) { |
| kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc)); |
| kgsl_mem_entry_put(objbuf[obj].entry); |
| } |
| break; |
| default: |
| KGSL_DRV_ERR(device, |
| "snapshot: Invalid snapshot object type: %d\n", |
| objbuf[obj].type); |
| break; |
| } |
| |
| return snapshot; |
| } |
| |
| /* adreno_snapshot - Snapshot the Adreno GPU state |
| * @device - KGSL device to snapshot |
| * @snapshot - Pointer to the start of memory to write into |
| * @remain - A pointer to how many bytes of memory are remaining in the snapshot |
| * @hang - set if this snapshot was automatically triggered by a GPU hang |
| * This is a hook function called by kgsl_snapshot to snapshot the |
| * Adreno specific information for the GPU snapshot. In turn, this function |
| * calls the GPU specific snapshot function to get core specific information. |
| */ |
| |
| void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain, |
| int hang) |
| { |
| int i; |
| uint32_t ibbase, ibsize; |
| struct adreno_device *adreno_dev = ADRENO_DEVICE(device); |
| phys_addr_t ptbase; |
| |
| /* Reset the list of objects */ |
| objbufptr = 0; |
| |
| snapshot_frozen_objsize = 0; |
| |
| /* Get the physical address of the MMU pagetable */ |
| ptbase = kgsl_mmu_get_current_ptbase(&device->mmu); |
| |
| /* Dump the ringbuffer */ |
| snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB, |
| snapshot, remain, snapshot_rb, NULL); |
| |
| /* |
| * Add a section that lists (gpuaddr, size, memtype) tuples of the |
| * hanging process |
| */ |
| snapshot = kgsl_snapshot_add_section(device, |
| KGSL_SNAPSHOT_SECTION_MEMLIST, snapshot, remain, |
| snapshot_capture_mem_list, NULL); |
| /* |
| * Make sure that the last IB1 that was being executed is dumped. |
| * Since this was the last IB1 that was processed, we should have |
| * already added it to the list during the ringbuffer parse but we |
| * want to be double plus sure. |
| */ |
| |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ibbase); |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ibsize); |
| |
| /* |
| * The problem is that IB size from the register is the unprocessed size |
| * of the buffer not the original size, so if we didn't catch this |
| * buffer being directly used in the RB, then we might not be able to |
| * dump the whle thing. Print a warning message so we can try to |
| * figure how often this really happens. |
| */ |
| |
| if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) { |
| push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase, |
| ibbase, ibsize); |
| KGSL_DRV_ERR(device, "CP_IB1_BASE not found in the ringbuffer. " |
| "Dumping %x dwords of the buffer.\n", ibsize); |
| } |
| |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BASE, &ibbase); |
| adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ibsize); |
| |
| /* |
| * Add the last parsed IB2 to the list. The IB2 should be found as we |
| * parse the objects below, but we try to add it to the list first, so |
| * it too can be parsed. Don't print an error message in this case - if |
| * the IB2 is found during parsing, the list will be updated with the |
| * correct size. |
| */ |
| |
| if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) { |
| push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase, |
| ibbase, ibsize); |
| } |
| |
| /* |
| * Go through the list of found objects and dump each one. As the IBs |
| * are parsed, more objects might be found, and objbufptr will increase |
| */ |
| for (i = 0; i < objbufptr; i++) |
| snapshot = dump_object(device, i, snapshot, remain); |
| |
| /* Add GPU specific sections - registers mainly, but other stuff too */ |
| if (adreno_dev->gpudev->snapshot) |
| snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot, |
| remain, hang); |
| |
| if (snapshot_frozen_objsize) |
| KGSL_DRV_ERR(device, "GPU snapshot froze %dKb of GPU buffers\n", |
| snapshot_frozen_objsize / 1024); |
| |
| /* |
| * Queue a work item that will save the IB data in snapshot into |
| * static memory to prevent loss of data due to overwriting of |
| * memory |
| */ |
| queue_work(device->work_queue, &device->snapshot_obj_ws); |
| |
| return snapshot; |
| } |