M7350/kernel/drivers/gpu/msm/adreno_snapshot.c

/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#include "kgsl.h"
#include "kgsl_sharedmem.h"
#include "kgsl_snapshot.h"

#include "adreno.h"
#include "adreno_pm4types.h"
#include "a2xx_reg.h"
#include "a3xx_reg.h"

/* Number of dwords of ringbuffer history to record */
#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100

/* Maintain a list of the objects we see during parsing */

#define SNAPSHOT_OBJ_BUFSIZE 64

#define SNAPSHOT_OBJ_TYPE_IB 0

/* Keep track of how many bytes are frozen after a snapshot and tell the user */
static int snapshot_frozen_objsize;

static struct kgsl_snapshot_obj {
	int type;
	uint32_t gpuaddr;
	phys_addr_t ptbase;
	void *ptr;
	int dwords;
} objbuf[SNAPSHOT_OBJ_BUFSIZE];

/* Pointer to the next open entry in the object list */
static int objbufptr;

/* Push a new buffer object onto the list */
static void push_object(struct kgsl_device *device, int type,
	phys_addr_t ptbase,
	uint32_t gpuaddr, int dwords)
{
	int index;
	void *ptr;

	/*
	 * Sometimes IBs can be reused in the same dump.  Because we parse from
	 * oldest to newest, if we come across an IB that has already been used,
	 * assume that it has been reused and update the list with the newest
	 * size.
	 */

	for (index = 0; index < objbufptr; index++) {
		if (objbuf[index].gpuaddr == gpuaddr &&
			objbuf[index].ptbase == ptbase) {
				objbuf[index].dwords = dwords;
				return;
			}
	}

	if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) {
		KGSL_DRV_ERR(device, "snapshot: too many snapshot objects\n");
		return;
	}

	/*
	 * adreno_convertaddr verifies that the IB size is valid - at least in
	 * the context of it being smaller then the allocated memory space
	 */
	ptr = adreno_convertaddr(device, ptbase, gpuaddr, dwords << 2);

	if (ptr == NULL) {
		KGSL_DRV_ERR(device,
			"snapshot: Can't find GPU address for %x\n", gpuaddr);
		return;
	}

	/* Put it on the list of things to parse */
	objbuf[objbufptr].type = type;
	objbuf[objbufptr].gpuaddr = gpuaddr;
	objbuf[objbufptr].ptbase = ptbase;
	objbuf[objbufptr].dwords = dwords;
	objbuf[objbufptr++].ptr = ptr;
}

/*
 * Return a 1 if the specified object is already on the list of buffers
 * to be dumped
 */

static int find_object(int type, unsigned int gpuaddr, phys_addr_t ptbase)
{
	int index;

	for (index = 0; index < objbufptr; index++) {
		if (objbuf[index].gpuaddr == gpuaddr &&
			objbuf[index].ptbase == ptbase &&
			objbuf[index].type == type)
			return 1;
	}

	return 0;
}

/*
 * This structure keeps track of type0 writes to VSC_PIPE_DATA_ADDRESS_x and
 * VSC_PIPE_DATA_LENGTH_x. When a draw initator is called these registers
 * point to buffers that we need to freeze for a snapshot
 */

static struct {
	unsigned int base;
	unsigned int size;
} vsc_pipe[8];

/*
 * This is the cached value of type0 writes to the VSC_SIZE_ADDRESS which
 * contains the buffer address of the visiblity stream size buffer during a
 * binning pass
 */

static unsigned int vsc_size_address;

/*
 * This struct keeps track of type0 writes to VFD_FETCH_INSTR_0_X and
 * VFD_FETCH_INSTR_1_X registers. When a draw initator is called the addresses
 * and sizes in these registers point to VBOs that we need to freeze for a
 * snapshot
 */

static struct {
	unsigned int base;
	unsigned int stride;
} vbo[16];

/*
 * This is the cached value of type0 writes to VFD_INDEX_MAX.  This will be used
 * to calculate the size of the VBOs when the draw initator is called
 */

static unsigned int vfd_index_max;

/*
 * This is the cached value of type0 writes to VFD_CONTROL_0 which tells us how
 * many VBOs are active when the draw initator is called
 */

static unsigned int vfd_control_0;

/*
 * Cached value of type0 writes to SP_VS_PVT_MEM_ADDR and SP_FS_PVT_MEM_ADDR.
 * This is a buffer that contains private stack information for the shader
 */

static unsigned int sp_vs_pvt_mem_addr;
static unsigned int sp_fs_pvt_mem_addr;

/*
 * Cached value of SP_VS_OBJ_START_REG and SP_FS_OBJ_START_REG.
 */
static unsigned int sp_vs_obj_start_reg;
static unsigned int sp_fs_obj_start_reg;

/*
 * Each load state block has two possible types.  Each type has a different
 * number of dwords per unit.  Use this handy lookup table to make sure
 * we dump the right amount of data from the indirect buffer
 */

static int load_state_unit_sizes[7][2] = {
	{ 2, 4 },
	{ 0, 1 },
	{ 2, 4 },
	{ 0, 1 },
	{ 8, 2 },
	{ 8, 2 },
	{ 8, 2 },
};

static int ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
	phys_addr_t ptbase)
{
	unsigned int block, source, type;
	int ret = 0;

	/*
	 * The object here is to find indirect shaders i.e - shaders loaded from
	 * GPU memory instead of directly in the command.  These should be added
	 * to the list of memory objects to dump. So look at the load state
	 * if the block is indirect (source = 4). If so then add the memory
	 * address to the list.  The size of the object differs depending on the
	 * type per the load_state_unit_sizes array above.
	 */

	if (type3_pkt_size(pkt[0]) < 2)
		return 0;

	/*
	 * pkt[1] 18:16 - source
	 * pkt[1] 21:19 - state block
	 * pkt[1] 31:22 - size in units
	 * pkt[2] 0:1 - type
	 * pkt[2] 31:2 - GPU memory address
	 */

	block = (pkt[1] >> 19) & 0x07;
	source = (pkt[1] >> 16) & 0x07;
	type = pkt[2] & 0x03;

	if (source == 4) {
		int unitsize, ret;

		if (type == 0)
			unitsize = load_state_unit_sizes[block][0];
		else
			unitsize = load_state_unit_sizes[block][1];

		/* Freeze the GPU buffer containing the shader */

		ret = kgsl_snapshot_get_object(device, ptbase,
				pkt[2] & 0xFFFFFFFC,
				(((pkt[1] >> 22) & 0x03FF) * unitsize) << 2,
				SNAPSHOT_GPU_OBJECT_SHADER);

		if (ret < 0)
			return -EINVAL;

		snapshot_frozen_objsize += ret;
	}

	return ret;
}

/*
 * This opcode sets the base addresses for the visibilty stream buffer and the
 * visiblity stream size buffer.
 */

static int ib_parse_set_bin_data(struct kgsl_device *device, unsigned int *pkt,
	phys_addr_t ptbase)
{
	int ret;

	if (type3_pkt_size(pkt[0]) < 2)
		return 0;

	/* Visiblity stream buffer */
	ret = kgsl_snapshot_get_object(device, ptbase, pkt[1], 0,
			SNAPSHOT_GPU_OBJECT_GENERIC);

	if (ret < 0)
		return -EINVAL;

	snapshot_frozen_objsize += ret;

	/* visiblity stream size buffer (fixed size 8 dwords) */
	ret = kgsl_snapshot_get_object(device, ptbase, pkt[2], 32,
			SNAPSHOT_GPU_OBJECT_GENERIC);

	if (ret >= 0)
		snapshot_frozen_objsize += ret;

	return ret;
}

/*
 * This opcode writes to GPU memory - if the buffer is written to, there is a
 * good chance that it would be valuable to capture in the snapshot, so mark all
 * buffers that are written to as frozen
 */

static int ib_parse_mem_write(struct kgsl_device *device, unsigned int *pkt,
	phys_addr_t ptbase)
{
	int ret;

	if (type3_pkt_size(pkt[0]) < 1)
		return 0;

	/*
	 * The address is where the data in the rest of this packet is written
	 * to, but since that might be an offset into the larger buffer we need
	 * to get the whole thing. Pass a size of 0 kgsl_snapshot_get_object to
	 * capture the entire buffer.
	 */

	ret = kgsl_snapshot_get_object(device, ptbase, pkt[1] & 0xFFFFFFFC, 0,
		SNAPSHOT_GPU_OBJECT_GENERIC);

	if (ret >= 0)
		snapshot_frozen_objsize += ret;

	return ret;
}

/*
 * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
 * the GPU, so this is the point where all the registers and buffers become
 * "valid".  The DRAW_INDX may also have an index buffer pointer that should be
 * frozen with the others
 */

static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
	phys_addr_t ptbase)
{
	int ret = 0, i;

	if (type3_pkt_size(pkt[0]) < 3)
		return 0;

	/*  DRAW_IDX may have a index buffer pointer */

	if (type3_pkt_size(pkt[0]) > 3) {
		ret = kgsl_snapshot_get_object(device, ptbase, pkt[4], pkt[5],
			SNAPSHOT_GPU_OBJECT_GENERIC);
		if (ret < 0)
			return -EINVAL;

		snapshot_frozen_objsize += ret;
	}

	/*
	 * All of the type0 writes are valid at a draw initiator, so freeze
	 * the various buffers that we are tracking
	 */

	/* First up the visiblity stream buffer */

	for (i = 0; i < ARRAY_SIZE(vsc_pipe); i++) {
		if (vsc_pipe[i].base != 0 && vsc_pipe[i].size != 0) {
			ret = kgsl_snapshot_get_object(device, ptbase,
				vsc_pipe[i].base, vsc_pipe[i].size,
				SNAPSHOT_GPU_OBJECT_GENERIC);
			if (ret < 0)
				return -EINVAL;

			snapshot_frozen_objsize += ret;
		}
	}

	/* Next the visibility stream size buffer */

	if (vsc_size_address) {
		ret = kgsl_snapshot_get_object(device, ptbase,
				vsc_size_address, 32,
				SNAPSHOT_GPU_OBJECT_GENERIC);
		if (ret < 0)
			return -EINVAL;

		snapshot_frozen_objsize += ret;
	}

	/* Next private shader buffer memory */
	if (sp_vs_pvt_mem_addr) {
		ret = kgsl_snapshot_get_object(device, ptbase,
				sp_vs_pvt_mem_addr, 8192,
				SNAPSHOT_GPU_OBJECT_GENERIC);
		if (ret < 0)
			return -EINVAL;

		snapshot_frozen_objsize += ret;
		sp_vs_pvt_mem_addr = 0;
	}

	if (sp_fs_pvt_mem_addr) {
		ret = kgsl_snapshot_get_object(device, ptbase,
				sp_fs_pvt_mem_addr, 8192,
				SNAPSHOT_GPU_OBJECT_GENERIC);
		if (ret < 0)
			return -EINVAL;

		snapshot_frozen_objsize += ret;
		sp_fs_pvt_mem_addr = 0;
	}

	if (sp_vs_obj_start_reg) {
		ret = kgsl_snapshot_get_object(device, ptbase,
			sp_vs_obj_start_reg & 0xFFFFFFE0, 0,
			SNAPSHOT_GPU_OBJECT_GENERIC);
		if (ret < 0)
			return -EINVAL;
		snapshot_frozen_objsize += ret;
		sp_vs_obj_start_reg = 0;
	}

	if (sp_fs_obj_start_reg) {
		ret = kgsl_snapshot_get_object(device, ptbase,
			sp_fs_obj_start_reg & 0xFFFFFFE0, 0,
			SNAPSHOT_GPU_OBJECT_GENERIC);
		if (ret < 0)
			return -EINVAL;
		snapshot_frozen_objsize += ret;
		sp_fs_obj_start_reg = 0;
	}

	/* Finally: VBOs */

	/* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
	for (i = 0; i < (vfd_control_0) >> 27; i++) {
		int size;

		/*
		 * The size of the VBO is the stride stored in
		 * VFD_FETCH_INSTR_0_X.BUFSTRIDE * VFD_INDEX_MAX. The base
		 * is stored in VFD_FETCH_INSTR_1_X
		 */

		if (vbo[i].base != 0) {
			size = vbo[i].stride * vfd_index_max;

			ret = kgsl_snapshot_get_object(device, ptbase,
				vbo[i].base,
				0, SNAPSHOT_GPU_OBJECT_GENERIC);
			if (ret < 0)
				return -EINVAL;

			snapshot_frozen_objsize += ret;
		}

		vbo[i].base = 0;
		vbo[i].stride = 0;
	}

	vfd_control_0 = 0;
	vfd_index_max = 0;

	return ret;
}

/*
 * Parse all the type3 opcode packets that may contain important information,
 * such as additional GPU buffers to grab or a draw initator
 */

static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
	phys_addr_t ptbase)
{
	int opcode = cp_type3_opcode(*ptr);

	if (opcode == CP_LOAD_STATE)
		return ib_parse_load_state(device, ptr, ptbase);
	else if (opcode == CP_SET_BIN_DATA)
		return ib_parse_set_bin_data(device, ptr, ptbase);
	else if (opcode == CP_MEM_WRITE)
		return ib_parse_mem_write(device, ptr, ptbase);
	else if (opcode == CP_DRAW_INDX)
		return ib_parse_draw_indx(device, ptr, ptbase);

	return 0;
}

/*
 * Parse type0 packets found in the stream.  Some of the registers that are
 * written are clues for GPU buffers that we need to freeze.  Register writes
 * are considred valid when a draw initator is called, so just cache the values
 * here and freeze them when a CP_DRAW_INDX is seen.  This protects against
 * needlessly caching buffers that won't be used during a draw call
 */

static void ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
	phys_addr_t ptbase)
{
	int size = type0_pkt_size(*ptr);
	int offset = type0_pkt_offset(*ptr);
	int i;

	for (i = 0; i < size - 1; i++, offset++) {

		/* Visiblity stream buffer */

		if (offset >= A3XX_VSC_PIPE_DATA_ADDRESS_0 &&
			offset <= A3XX_VSC_PIPE_DATA_LENGTH_7) {
			int index = offset - A3XX_VSC_PIPE_DATA_ADDRESS_0;

			/* Each bank of address and length registers are
			 * interleaved with an empty register:
			 *
			 * address 0
			 * length 0
			 * empty
			 * address 1
			 * length 1
			 * empty
			 * ...
			 */

			if ((index % 3) == 0)
				vsc_pipe[index / 3].base = ptr[i + 1];
			else if ((index % 3) == 1)
				vsc_pipe[index / 3].size = ptr[i + 1];
		} else if ((offset >= A3XX_VFD_FETCH_INSTR_0_0) &&
			(offset <= A3XX_VFD_FETCH_INSTR_1_F)) {
			int index = offset - A3XX_VFD_FETCH_INSTR_0_0;

			/*
			 * FETCH_INSTR_0_X and FETCH_INSTR_1_X banks are
			 * interleaved as above but without the empty register
			 * in between
			 */

			if ((index % 2) == 0)
				vbo[index >> 1].stride =
					(ptr[i + 1] >> 7) & 0x1FF;
			else
				vbo[index >> 1].base = ptr[i + 1];
		} else {
			/*
			 * Cache various support registers for calculating
			 * buffer sizes
			 */

			switch (offset) {
			case A3XX_VFD_CONTROL_0:
				vfd_control_0 = ptr[i + 1];
				break;
			case A3XX_VFD_INDEX_MAX:
				vfd_index_max = ptr[i + 1];
				break;
			case A3XX_VSC_SIZE_ADDRESS:
				vsc_size_address = ptr[i + 1];
				break;
			case A3XX_SP_VS_PVT_MEM_ADDR_REG:
				sp_vs_pvt_mem_addr = ptr[i + 1];
				break;
			case A3XX_SP_FS_PVT_MEM_ADDR_REG:
				sp_fs_pvt_mem_addr = ptr[i + 1];
				break;
			case A3XX_SP_VS_OBJ_START_REG:
				sp_vs_obj_start_reg = ptr[i + 1];
				break;
			case A3XX_SP_FS_OBJ_START_REG:
				sp_fs_obj_start_reg = ptr[i + 1];
				break;
			}
		}
	}
}

static inline int parse_ib(struct kgsl_device *device, phys_addr_t ptbase,
		unsigned int gpuaddr, unsigned int dwords);

/* Add an IB as a GPU object, but first, parse it to find more goodies within */

static int ib_add_gpu_object(struct kgsl_device *device, phys_addr_t ptbase,
		unsigned int gpuaddr, unsigned int dwords)
{
	int i, ret, rem = dwords;
	unsigned int *src;

	/*
	 * If the object is already in the list, we don't need to parse it again
	 */

	if (kgsl_snapshot_have_object(device, ptbase, gpuaddr, dwords << 2))
		return 0;

	src = (unsigned int *) adreno_convertaddr(device, ptbase, gpuaddr,
		dwords << 2);

	if (src == NULL)
		return -EINVAL;

	for (i = 0; rem > 0; rem--, i++) {
		int pktsize;

		/* If the packet isn't a type 1 or a type 3, then don't bother
		 * parsing it - it is likely corrupted */

		if (!pkt_is_type0(src[i]) && !pkt_is_type3(src[i]))
			break;

		pktsize = type3_pkt_size(src[i]);

		if (!pktsize || (pktsize + 1) > rem)
			break;

		if (pkt_is_type3(src[i])) {
			if (adreno_cmd_is_ib(src[i])) {
				unsigned int gpuaddr = src[i + 1];
				unsigned int size = src[i + 2];

				ret = parse_ib(device, ptbase, gpuaddr, size);

				/* If adding the IB failed then stop parsing */
				if (ret < 0)
					goto done;
			} else {
				ret = ib_parse_type3(device, &src[i], ptbase);
				/*
				 * If the parse function failed (probably
				 * because of a bad decode) then bail out and
				 * just capture the binary IB data
				 */

				if (ret < 0)
					goto done;
			}
		} else if (pkt_is_type0(src[i])) {
			ib_parse_type0(device, &src[i], ptbase);
		}

		i += pktsize;
		rem -= pktsize;
	}

done:
	ret = kgsl_snapshot_get_object(device, ptbase, gpuaddr, dwords << 2,
		SNAPSHOT_GPU_OBJECT_IB);

	if (ret >= 0)
		snapshot_frozen_objsize += ret;

	return ret;
}

/*
 * We want to store the last executed IB1 and IB2 in the static region to ensure
 * that we get at least some information out of the snapshot even if we can't
 * access the dynamic data from the sysfs file.  Push all other IBs on the
 * dynamic list
 */
static inline int parse_ib(struct kgsl_device *device, phys_addr_t ptbase,
		unsigned int gpuaddr, unsigned int dwords)
{
	unsigned int ib1base, ib2base;
	int ret = 0;

	/*
	 * Check the IB address - if it is either the last executed IB1 or the
	 * last executed IB2 then push it into the static blob otherwise put
	 * it in the dynamic list
	 */

	kgsl_regread(device, REG_CP_IB1_BASE, &ib1base);
	kgsl_regread(device, REG_CP_IB2_BASE, &ib2base);

	if (gpuaddr == ib1base || gpuaddr == ib2base)
		push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
			gpuaddr, dwords);
	else
		ret = ib_add_gpu_object(device, ptbase, gpuaddr, dwords);

	return ret;
}

/* Snapshot the ringbuffer memory */
static int snapshot_rb(struct kgsl_device *device, void *snapshot,
	int remain, void *priv)
{
	struct kgsl_snapshot_rb *header = snapshot;
	unsigned int *data = snapshot + sizeof(*header);
	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
	unsigned int rptr, *rbptr, ibbase;
	phys_addr_t ptbase;
	int index, size, i;
	int parse_ibs = 0, ib_parse_start;

	/* Get the physical address of the MMU pagetable */
	ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);

	/* Get the current read pointers for the RB */
	kgsl_regread(device, REG_CP_RB_RPTR, &rptr);

	/* Address of the last processed IB */
	kgsl_regread(device, REG_CP_IB1_BASE, &ibbase);

	/*
	 * Figure out the window of ringbuffer data to dump.  First we need to
	 * find where the last processed IB ws submitted.  Start walking back
	 * from the rptr
	 */

	index = rptr;
	rbptr = rb->buffer_desc.hostptr;

	do {
		index--;

		if (index < 0) {
			index = rb->sizedwords - 3;

			/* We wrapped without finding what we wanted */
			if (index < rb->wptr) {
				index = rb->wptr;
				break;
			}
		}

		if (adreno_cmd_is_ib(rbptr[index]) &&
			rbptr[index + 1] == ibbase)
			break;
	} while (index != rb->wptr);

	/*
	 * index points at the last submitted IB. We can only trust that the
	 * memory between the context switch and the hanging IB is valid, so
	 * the next step is to find the context switch before the submission
	 */

	while (index != rb->wptr) {
		index--;

		if (index < 0) {
			index = rb->sizedwords - 2;

			/*
			 * Wrapped without finding the context switch. This is
			 * harmless - we should still have enough data to dump a
			 * valid state
			 */

			if (index < rb->wptr) {
				index = rb->wptr;
				break;
			}
		}

		/* Break if the current packet is a context switch identifier */
		if ((rbptr[index] == cp_nop_packet(1)) &&
			(rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER))
			break;
	}

	/*
	 * Index represents the start of the window of interest.  We will try
	 * to dump all buffers between here and the rptr
	 */

	ib_parse_start = index;

	/*
	 * Dump the entire ringbuffer - the parser can choose how much of it to
	 * process
	 */

	size = (rb->sizedwords << 2);

	if (remain < size + sizeof(*header)) {
		KGSL_DRV_ERR(device,
			"snapshot: Not enough memory for the rb section");
		return 0;
	}

	/* Write the sub-header for the section */
	header->start = rb->wptr;
	header->end = rb->wptr;
	header->wptr = rb->wptr;
	header->rbsize = rb->sizedwords;
	header->count = rb->sizedwords;

	/*
	 * Loop through the RB, copying the data and looking for indirect
	 * buffers and MMU pagetable changes
	 */

	index = rb->wptr;
	for (i = 0; i < rb->sizedwords; i++) {
		*data = rbptr[index];

		/*
		 * Only parse IBs between the start and the rptr or the next
		 * context switch, whichever comes first
		 */

		if (parse_ibs == 0 && index == ib_parse_start)
			parse_ibs = 1;
		else if (index == rptr || adreno_rb_ctxtswitch(&rbptr[index]))
			parse_ibs = 0;

		if (parse_ibs && adreno_cmd_is_ib(rbptr[index])) {
			unsigned int ibaddr = rbptr[index + 1];
			unsigned int ibsize = rbptr[index + 2];

			/*
			 * This will return non NULL if the IB happens to be
			 * part of the context memory (i.e - context switch
			 * command buffers)
			 */

			struct kgsl_memdesc *memdesc =
				adreno_find_ctxtmem(device, ptbase, ibaddr,
					ibsize << 2);

			/* IOMMU uses a NOP IB placed in setsate memory */
			if (NULL == memdesc)
				if (kgsl_gpuaddr_in_memdesc(
						&device->mmu.setstate_memory,
						ibaddr, ibsize << 2))
					memdesc = &device->mmu.setstate_memory;
			/*
			 * The IB from CP_IB1_BASE and the IBs for legacy
			 * context switch go into the snapshot all
			 * others get marked at GPU objects
			 */

			if (memdesc != NULL)
				push_object(device, SNAPSHOT_OBJ_TYPE_IB,
					ptbase, ibaddr, ibsize);
			else
				parse_ib(device, ptbase, ibaddr, ibsize);
		}

		index = index + 1;

		if (index == rb->sizedwords)
			index = 0;

		data++;
	}

	/* Return the size of the section */
	return size + sizeof(*header);
}

static int snapshot_capture_mem_list(struct kgsl_device *device, void *snapshot,
			int remain, void *priv)
{
	struct kgsl_snapshot_replay_mem_list *header = snapshot;
	struct kgsl_process_private *private = NULL;
	struct kgsl_process_private *tmp_private;
	phys_addr_t ptbase;
	struct rb_node *node;
	struct kgsl_mem_entry *entry = NULL;
	int num_mem;
	unsigned int *data = snapshot + sizeof(*header);

	ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
	mutex_lock(&kgsl_driver.process_mutex);
	list_for_each_entry(tmp_private, &kgsl_driver.process_list, list) {
		if (kgsl_mmu_pt_equal(&device->mmu, tmp_private->pagetable,
			ptbase)) {
			private = tmp_private;
			break;
		}
	}
	mutex_unlock(&kgsl_driver.process_mutex);
	if (!private) {
		KGSL_DRV_ERR(device,
		"Failed to get pointer to process private structure\n");
		return 0;
	}
	/* We need to know the number of memory objects that the process has */
	spin_lock(&private->mem_lock);
	for (node = rb_first(&private->mem_rb), num_mem = 0; node; ) {
		entry = rb_entry(node, struct kgsl_mem_entry, node);
		node = rb_next(&entry->node);
		num_mem++;
	}

	if (remain < ((num_mem * 3 * sizeof(unsigned int)) +
			sizeof(*header))) {
		KGSL_DRV_ERR(device,
			"snapshot: Not enough memory for the mem list section");
		spin_unlock(&private->mem_lock);
		return 0;
	}
	header->num_entries = num_mem;
	header->ptbase = (__u32)ptbase;
	/*
	 * Walk throught the memory list and store the
	 * tuples(gpuaddr, size, memtype) in snapshot
	 */
	for (node = rb_first(&private->mem_rb); node; ) {
		entry = rb_entry(node, struct kgsl_mem_entry, node);
		node = rb_next(&entry->node);

		*data++ = entry->memdesc.gpuaddr;
		*data++ = entry->memdesc.size;
		*data++ = (entry->memdesc.priv & KGSL_MEMTYPE_MASK) >>
							KGSL_MEMTYPE_SHIFT;
	}
	spin_unlock(&private->mem_lock);
	return sizeof(*header) + (num_mem * 3 * sizeof(unsigned int));
}

/* Snapshot the memory for an indirect buffer */
static int snapshot_ib(struct kgsl_device *device, void *snapshot,
	int remain, void *priv)
{
	struct kgsl_snapshot_ib *header = snapshot;
	struct kgsl_snapshot_obj *obj = priv;
	unsigned int *src = obj->ptr;
	unsigned int *dst = snapshot + sizeof(*header);
	int i, ret;

	if (remain < (obj->dwords << 2) + sizeof(*header)) {
		KGSL_DRV_ERR(device,
			"snapshot: Not enough memory for the ib section");
		return 0;
	}

	/* Write the sub-header for the section */
	header->gpuaddr = obj->gpuaddr;
	header->ptbase = (__u32)obj->ptbase;
	header->size = obj->dwords;

	/* Write the contents of the ib */
	for (i = 0; i < obj->dwords; i++, src++, dst++) {
		*dst = *src;

		if (pkt_is_type3(*src)) {
			if ((obj->dwords - i) < type3_pkt_size(*src) + 1)
				continue;

			if (adreno_cmd_is_ib(*src))
				ret = parse_ib(device, obj->ptbase, src[1],
					src[2]);
			else
				ret = ib_parse_type3(device, src, obj->ptbase);

			/* Stop parsing if the type3 decode fails */
			if (ret < 0)
				break;
		}
	}

	return (obj->dwords << 2) + sizeof(*header);
}

/* Dump another item on the current pending list */
static void *dump_object(struct kgsl_device *device, int obj, void *snapshot,
	int *remain)
{
	switch (objbuf[obj].type) {
	case SNAPSHOT_OBJ_TYPE_IB:
		snapshot = kgsl_snapshot_add_section(device,
			KGSL_SNAPSHOT_SECTION_IB, snapshot, remain,
			snapshot_ib, &objbuf[obj]);
		break;
	default:
		KGSL_DRV_ERR(device,
			"snapshot: Invalid snapshot object type: %d\n",
			objbuf[obj].type);
		break;
	}

	return snapshot;
}

/* adreno_snapshot - Snapshot the Adreno GPU state
 * @device - KGSL device to snapshot
 * @snapshot - Pointer to the start of memory to write into
 * @remain - A pointer to how many bytes of memory are remaining in the snapshot
 * @hang - set if this snapshot was automatically triggered by a GPU hang
 * This is a hook function called by kgsl_snapshot to snapshot the
 * Adreno specific information for the GPU snapshot.  In turn, this function
 * calls the GPU specific snapshot function to get core specific information.
 */

void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
		int hang)
{
	int i;
	uint32_t ibbase, ibsize;
	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
	phys_addr_t ptbase;

	/* Reset the list of objects */
	objbufptr = 0;

	snapshot_frozen_objsize = 0;

	/* Clear the caches for the visibilty stream and VBO parsing */

	vfd_control_0 = 0;
	vfd_index_max = 0;
	vsc_size_address = 0;

	memset(vsc_pipe, 0, sizeof(vsc_pipe));
	memset(vbo, 0, sizeof(vbo));

	/* Get the physical address of the MMU pagetable */
	ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);

	/* Dump the ringbuffer */
	snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB,
		snapshot, remain, snapshot_rb, NULL);

	/*
	 * Add a section that lists (gpuaddr, size, memtype) tuples of the
	 * hanging process
	 */
	snapshot = kgsl_snapshot_add_section(device,
			KGSL_SNAPSHOT_SECTION_MEMLIST, snapshot, remain,
			snapshot_capture_mem_list, NULL);
	/*
	 * Make sure that the last IB1 that was being executed is dumped.
	 * Since this was the last IB1 that was processed, we should have
	 * already added it to the list during the ringbuffer parse but we
	 * want to be double plus sure.
	 */

	kgsl_regread(device, REG_CP_IB1_BASE, &ibbase);
	kgsl_regread(device, REG_CP_IB1_BUFSZ, &ibsize);

	/*
	 * The problem is that IB size from the register is the unprocessed size
	 * of the buffer not the original size, so if we didn't catch this
	 * buffer being directly used in the RB, then we might not be able to
	 * dump the whle thing. Print a warning message so we can try to
	 * figure how often this really happens.
	 */

	if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) {
		push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
			ibbase, ibsize);
		KGSL_DRV_ERR(device, "CP_IB1_BASE not found in the ringbuffer. "
			"Dumping %x dwords of the buffer.\n", ibsize);
	}

	kgsl_regread(device, REG_CP_IB2_BASE, &ibbase);
	kgsl_regread(device, REG_CP_IB2_BUFSZ, &ibsize);

	/*
	 * Add the last parsed IB2 to the list. The IB2 should be found as we
	 * parse the objects below, but we try to add it to the list first, so
	 * it too can be parsed.  Don't print an error message in this case - if
	 * the IB2 is found during parsing, the list will be updated with the
	 * correct size.
	 */

	if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) {
		push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
			ibbase, ibsize);
	}

	/*
	 * Go through the list of found objects and dump each one.  As the IBs
	 * are parsed, more objects might be found, and objbufptr will increase
	 */
	for (i = 0; i < objbufptr; i++)
		snapshot = dump_object(device, i, snapshot, remain);

	/* Add GPU specific sections - registers mainly, but other stuff too */
	if (adreno_dev->gpudev->snapshot)
		snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
			remain, hang);

	if (snapshot_frozen_objsize)
		KGSL_DRV_ERR(device, "GPU snapshot froze %dKb of GPU buffers\n",
			snapshot_frozen_objsize / 1024);

	return snapshot;
}