/**********************************************************************
**                                                                   **
**                I N T E L   P R O P R I E T A R Y                  **
**                                                                   **
**   COPYRIGHT (c) 2004 - 2005  BY  INTEL  CORPORATION.  ALL         **
**   RIGHTS RESERVED.   NO PART OF THIS PROGRAM OR PUBLICATION MAY   **
**   BE  REPRODUCED,   TRANSMITTED,   TRANSCRIBED,   STORED  IN  A   **
**   RETRIEVAL SYSTEM, OR TRANSLATED INTO ANY LANGUAGE OR COMPUTER   **
**   LANGUAGE IN ANY FORM OR BY ANY MEANS, ELECTRONIC, MECHANICAL,   **
**   MAGNETIC,  OPTICAL,  CHEMICAL, MANUAL, OR OTHERWISE,  WITHOUT   **
**   THE PRIOR WRITTEN PERMISSION OF :                               **
**                                                                   **
**                      INTEL  CORPORATION                           **
**                                                                   **
**                2200 MISSION COLLEGE BOULEVARD                     **
**                                                                   **
**             SANTA  CLARA,  CALIFORNIA  95052-8119                 **
**                                                                   **
**********************************************************************/

/**********************************************************************
**                                                                   **
** INTEL CORPORATION PROPRIETARY INFORMATION                         **
** This software is supplied under the terms of a license agreement  **
** with Intel Corporation and may not be copied nor disclosed        **
** except in accordance with the terms of that agreement.            **
**                                                                   **
** Module Name:                                                      **
**   dmaengine.c                                                     **
**                                                                   **
** Abstract:                                                         **
**                                                                   **
**********************************************************************/

#include <linux/init.h>
#include <linux/module.h>
#ifndef __VMKERNEL_MODULE__
#include <linux/device.h>
#include <linux/hardirq.h>
#include <linux/percpu.h>
#endif // __VMKERNEL_MODULE__
#include <linux/dmaengine.h>
#include <linux/spinlock.h>
#include "debug.h"
#include "cb_list.h"
#ifdef __VMKERNEL_MODULE__
#include "ioat_compat.h"
#endif // __VMKERNEL_MODULE__

static LIST_HEAD(dma_device_list);
static LIST_HEAD(dma_client_list);

#ifndef __VMKERNEL_MODULE__
DEFINE_PER_CPU(struct completion, kick_dma_poll);

#ifdef INSTRUMENTATION
#include <linux/vmalloc.h>
struct ioat_perf *ioat_perf_instr;
void Reset_Counters(void);
#define IOAT_TSC_OVH            2900
#define IOAT_CNTR1_OVH          42
#endif


/* --- sysfs implementation --- */

static ssize_t show_memcpy_count(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);

	sprintf(buf, "%lu\n", chan->memcpy_count);
	return strlen(buf) + 1;
}

static ssize_t show_bytes_transferred(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);

	sprintf(buf, "%lu\n", chan->bytes_transferred);
	return strlen(buf) + 1;
}

static ssize_t show_in_use(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);

	sprintf(buf, "%d\n", (chan->client ? 1 : 0));
	return strlen(buf) + 1;
}

static ssize_t show_min_hw_copy_size(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);

	sprintf(buf, "%d\n", chan->min_copy_size);
	return strlen(buf) + 1;
}

static ssize_t store_min_hw_copy_size(struct class_device *cd, const char *buf, size_t count)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);

	chan->min_copy_size = simple_strtoul(buf, NULL, 0);

	return count;
}

#ifdef INSTRUMENTATION

static ssize_t show_prof_location(struct class_device *cd, char *buf)
{
        sprintf(buf, "%x\n", ioat_perf_instr->prof_location);
        return strlen(buf) + 1;
}

static ssize_t store_prof_location(struct class_device *cd, const char *buf, size_t count)
{
        ioat_perf_instr->prof_location = simple_strtoul(buf, NULL, 0);

        return count;
}

static ssize_t store_prof_reset(struct class_device *cd, const char *buf, size_t count)
{
        ioat_perf_instr->prof_location = 0;
        ioat_perf_instr->cntr_val1 = 0;
        ioat_perf_instr->cntr_val2 = 0;
        ioat_perf_instr->begin_tsc = 0;
        ioat_perf_instr->end_tsc = 0;
        ioat_perf_instr->tsc_delta = 0;
        ioat_perf_instr->tsc_count = 0;
        ioat_perf_instr->force_count = 0;

        Reset_Counters();

        return count;
}

static ssize_t show_prof_data(struct class_device *cd, char *buf)
{
        uint32_t avg_tsc_delta, avg_cntr1; 
	uint32_t tsc_count, force_count;
        uint32_t avg_tsc_force, avg_cntr1_force;
        uint32_t avg_tsc_minus_ovh, avg_cntr1_minus_ovh;

        avg_tsc_delta = avg_cntr1 = 0;
        avg_tsc_minus_ovh = avg_cntr1_minus_ovh = 0;
        avg_tsc_force = avg_cntr1_force = 0;
        tsc_count = force_count = 0;

        if (ioat_perf_instr->tsc_count > 0)
                tsc_count = ioat_perf_instr->tsc_count;

        if (ioat_perf_instr->force_count > 0)
                force_count = ioat_perf_instr->force_count;

        if (tsc_count > 0) {
                avg_tsc_delta = ioat_perf_instr->tsc_delta/tsc_count;
                avg_tsc_minus_ovh = avg_tsc_delta - IOAT_TSC_OVH;
                avg_cntr1 = ioat_perf_instr->cntr_val1/tsc_count;
                avg_cntr1_minus_ovh = avg_cntr1 - IOAT_CNTR1_OVH;
        }
        if (force_count > 0) {
                avg_tsc_force = ioat_perf_instr->tsc_delta - IOAT_TSC_OVH*tsc_count;
                avg_tsc_force = avg_tsc_force/force_count;
                avg_cntr1_force = ioat_perf_instr->cntr_val1 - IOAT_CNTR1_OVH*tsc_count;
                avg_cntr1_force = avg_cntr1_force/force_count;
        }

        sprintf(buf, "Delta: %u\n Tsc_Cnt: %u\n Cntr1: %u\n Cntr2: %u\n Avg TSC: %u\n Avg Cntr1: %u\n Avg TSC (-ovh): %u\n Avg Cntr1 (-ovh): %u\n Avg TSC (F): %u\n Avg Cntrl (F): %u\n Force Cnt: %u\n",
                ioat_perf_instr->tsc_delta, ioat_perf_instr->tsc_count,
                ioat_perf_instr->cntr_val1, ioat_perf_instr->cntr_val2,
                avg_tsc_delta, avg_cntr1,
                avg_tsc_minus_ovh, avg_cntr1_minus_ovh,
		avg_tsc_force, avg_cntr1_force,
                ioat_perf_instr->force_count);

        	return strlen(buf) + 1;
}

#endif


static struct class_device_attribute dma_class_attrs[] = {
	__ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
	__ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
	__ATTR(in_use, S_IRUGO, show_in_use, NULL),
	__ATTR(min_copy_size, S_IRUGO | S_IWUSR, show_min_hw_copy_size, store_min_hw_copy_size),
#ifdef INSTRUMENTATION
	__ATTR(prof_data, S_IRUGO, show_prof_data, NULL),
        __ATTR(prof_location, S_IRUGO | S_IWUSR, show_prof_location, store_prof_location),
        __ATTR(prof_reset, S_IRUGO | S_IWUSR, NULL, store_prof_reset),

#endif	
	__ATTR_NULL
};

static void
dma_class_release(struct class_device *cd)
{
	/* do something */
}

static struct class dma_devclass = {
	.name		= "dma",
	.release	= dma_class_release,
	.class_dev_attrs = dma_class_attrs,
};
#endif // __VMKERNEL_MODULE__

/* --- client and device registration --- */

static struct dma_chan *
dma_client_chan_alloc(struct dma_client *client)
{
	struct dma_device *device;
	struct dma_chan *chan;

	BUG_ON(!client);

	/* Find a channel, any DMA engine will do */
	list_for_each_entry(device, &dma_device_list, global_node) {
		list_for_each_entry(chan, &device->channels, device_node) {
			if (chan->client)
				continue;

			if (chan->device->device_alloc_chan_resources(chan) >= 0) {
				chan->client = client;
				list_add_entry_tail(chan, &client->channels, client_node);
				return chan;
			}
		}
	}

	return NULL;
}

static void
dma_client_chan_free(struct dma_chan *chan)
{
	BUG_ON(!chan);

	chan->device->device_free_chan_resources(chan);
	chan->client = NULL;
}

static void
dma_chans_rebalance(void)
{
	struct dma_client *client;
	struct dma_chan *chan;

	list_for_each_entry(client, &dma_client_list, global_node) {

		while (client->chans_desired > client->chan_count) {
			chan = dma_client_chan_alloc(client);
			if (!chan)
				break;

			client->chan_count++;
			client->event_callback(client, chan, DMA_RESOURCE_ADDED);
		}

		while (client->chans_desired < client->chan_count) {
			chan = list_entry(client->channels.next, struct dma_chan, client_node);
			list_del(&chan->client_node);
			client->chan_count--;
			client->event_callback(client, chan, DMA_RESOURCE_REMOVED);
			dma_client_chan_free(chan);
		}
	}
}

struct dma_client *
dma_async_client_register(dma_event_callback event_callback)
{
	struct dma_client *client;

	BUG_ON(!event_callback);

	client = kmalloc(sizeof(*client), GFP_KERNEL);
	if (!client)
		return NULL;

	INIT_LIST_HEAD(&client->channels);

	client->chans_desired = 0;
	client->chan_count = 0;
	client->event_callback = event_callback;

	list_add_entry_tail(client, &dma_client_list, global_node);

	return client;
}

void
dma_async_client_unregister(struct dma_client *client)
{
	struct dma_chan *chan, *_chan;

	if (!client)
		return;

	list_for_each_entry_safe(chan, _chan, &client->channels, client_node) {
		dma_client_chan_free(chan);
	}

	list_del(&client->global_node);

	kfree(client);

	dma_chans_rebalance();
}

void
dma_async_client_chan_request(struct dma_client *client, unsigned int number)
{
	BUG_ON(!client);

	client->chans_desired = number;

	dma_chans_rebalance();
}

#ifdef __VMKERNEL_MODULE__
dma_cookie_t
dma_async_memcpy_mach_buf_to_buf(
	struct dma_chan *chan,
	dma_addr_t dest,
	dma_addr_t src,
	size_t len)
{
	chan->bytes_transferred += len;
	chan->memcpy_count++;

	return chan->device->device_memcpy_mach_buf_to_buf(chan, dest, src, len);
}
#endif // __VMKERNEL_MODULE__

dma_cookie_t
dma_async_memcpy_buf_to_buf(
	struct dma_chan *chan,
	void *dest,
	void *src,
	size_t len)
{
	chan->bytes_transferred += len;
	chan->memcpy_count++;

	return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len);
}

dma_cookie_t
dma_async_memcpy_buf_to_pg(
	struct dma_chan *chan,
	struct page *page,
	unsigned int offset,
	void *kdata,
	size_t len)
{
	chan->bytes_transferred += len;
	chan->memcpy_count++;

	return chan->device->device_memcpy_buf_to_pg(chan, page, offset, kdata, len);
}

dma_cookie_t
dma_async_memcpy_pg_to_pg(
	struct dma_chan *chan,
	struct page *dest_pg,
	unsigned int dest_off,
	struct page *src_pg,
	unsigned int src_off,
	size_t len)
{
	chan->bytes_transferred += len;
	chan->memcpy_count++;

	return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off,
		src_pg, src_off, len);
}

void
dma_async_memcpy_issue_pending(struct dma_chan *chan)
{
	return chan->device->device_memcpy_issue_pending(chan);
}

enum dma_status_t
dma_async_memcpy_complete(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
{
	return chan->device->device_memcpy_complete(chan, cookie, last, used);
}

int
dma_async_device_register(struct dma_device *device)
{
	static int id;
	int chancnt = 0;
	struct dma_chan* chan;

	if (!device)
		return -ENODEV;

	list_add_entry_tail(device, &dma_device_list, global_node);

	dma_chans_rebalance();

#ifndef __VMKERNEL_MODULE__
	device->dev_id = id++;

	/* represent channels in sysfs. Probably want devs too */
	list_for_each_entry(chan, &device->channels, device_node) {
		chan->chan_id = chancnt++;
		chan->class_dev.class = &dma_devclass;
		chan->class_dev.dev = NULL;
		snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d",
			device->dev_id, chan->chan_id);

		chan->min_copy_size = DMA_DEFAULT_MIN_COPY_SIZE;
		class_device_register(&chan->class_dev);
	}
#endif // __VMKERNEL_MODULE__

	return 0;
}

void
dma_async_device_unregister(struct dma_device* device)
{
	struct dma_chan *chan;

	BUG_ON(!device);

	list_for_each_entry(chan, &device->channels, device_node) {
		if (chan->client) {
			list_del(&chan->client_node);
			chan->client->chan_count--;
			chan->client->event_callback(chan->client, chan, DMA_RESOURCE_REMOVED);
			dma_client_chan_free(chan);
		}
#ifndef __VMKERNEL_MODULE__
		class_device_unregister(&chan->class_dev);
#endif // __VMKERNEL_MODULE__
	}

	list_del(&device->global_node);

	dma_chans_rebalance();
}

static struct workqueue_struct *dma_wait_wq;
static LIST_HEAD(dma_poll_list);

enum dma_status_t
dma_async_wait_for_completion(struct dma_chan *chan, dma_cookie_t cookie)
{
	while (dma_async_memcpy_complete(chan, cookie, NULL, NULL) == DMA_IN_PROGRESS)
		schedule();

	return DMA_SUCCESS;
}

#if 0
static void
dma_poll(void *data)
{
	struct dma_completion *comp = data;

	comp->status = dma_memcpy_complete(comp->chan, comp->cookie);
	while (comp->status == DMA_IN_PROGRESS) {
		comp->chan->device->device_arm_interrupt(comp->chan);
		wait_for_completion(&__get_cpu_var(kick_dma_poll));
		comp->status = dma_memcpy_complete(comp->chan, comp->cookie);
	}
	complete(&comp->comp);
}

enum dma_status_t
dma_wait_for_completion(struct dma_chan *chan, dma_cookie_t cookie)
{
	enum dma_status_t status;
	DECLARE_DMA_COMPLETION(comp, chan, cookie);
	DECLARE_WORK(dma_wait_work, dma_poll, &comp);

	BUG_ON(in_interrupt());

	status = dma_memcpy_complete(chan, cookie);
	if (status != DMA_IN_PROGRESS)
		return status;

	queue_work(dma_wait_wq, &dma_wait_work);
	wait_for_completion(&comp.comp);
	return comp.status;
}
#endif

static int __init dma_bus_init(void)
{
#ifdef __VMKERNEL_MODULE__
        return 0;
#else
	int cpu;

	dma_wait_wq = create_workqueue("dmapoll");
	for_each_online_cpu(cpu) {
		init_completion(&per_cpu(kick_dma_poll, cpu));
	}
#ifdef INSTRUMENTATION
	if (ioat_perf_instr == NULL) {
                ioat_perf_instr = vmalloc(sizeof(struct ioat_perf));
                memset(ioat_perf_instr, 0, sizeof(struct ioat_perf));
        }
#endif
	return class_register(&dma_devclass);
#endif // __VMKERNEL_MODULE__
}

subsys_initcall(dma_bus_init);

EXPORT_SYMBOL(dma_async_client_register);
EXPORT_SYMBOL(dma_async_client_unregister);
EXPORT_SYMBOL(dma_async_client_chan_request);
EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
EXPORT_SYMBOL(dma_async_memcpy_complete);
EXPORT_SYMBOL(dma_async_memcpy_issue_pending);
EXPORT_SYMBOL(dma_async_device_register);
EXPORT_SYMBOL(dma_async_device_unregister);
EXPORT_SYMBOL(dma_async_wait_for_completion);
#ifndef __VMKERNEL_MODULE__
EXPORT_PER_CPU_SYMBOL(kick_dma_poll);
#endif // __VMKERNEL_MODULE__


#ifdef CONFIG_INTEL_CB_DMA_SIM
#include <linux/syscalls.h>
EXPORT_SYMBOL(sys_sched_setscheduler);
#endif

#ifdef INSTRUMENTATION

void Init_P4_Counter(unsigned int configurationCounter, unsigned int configurationCounterValue, unsigned int eventCounter, unsigned int eventCounterValue)
{
        wrmsr(configurationCounter, configurationCounterValue, 0);
        wrmsr(eventCounter,eventCounterValue, 0);
}

unsigned int Read_P4_Counter(unsigned int counterToRead)
{
        unsigned int ret_val=0, ret_val2=0;

        rdmsr(counterToRead, ret_val, ret_val2);
        return ret_val;
}

void Clear_P4_Counter(unsigned int counterToClear, unsigned int counterValue)
{
        wrmsr(counterToClear, counterValue, 0);
        return;
}

/* 
 * Resets or clears counters used.
 * NOTE: If a new counter is added need to clear it here.
*/
void Reset_Counters(void)
{
        wrmsr(MSR_IQ_COUNTER0, 0, 0);
        wrmsr(MSR_IQ_COUNTER1, 0, 0);
        wrmsr(MSR_BPU_COUNTER0, 0, 0);
        return;
}

void Start_Instrumentation(unsigned int Location)
{

        if (!(ioat_perf_instr->prof_location & Location))  {
                return;
        }

        /* Instructions Retired */
       Init_P4_Counter(MSR_IQ_CCCR0, COUNT_INSTR_RET_CCCR0, MSR_CRU_ESCR0, INSTR_RET_ESCR0);

        /* L2 misses in general */
       Init_P4_Counter(MSR_BPU_CCCR0, COUNT_BSQ_CACHE_REFERENCE_CR_CCCR0, MSR_BSU_ESCR0, BSQ_CACHE_REFERENCE_CR_ESCR0);

        /* Begin TSC */
        rdtscll(ioat_perf_instr->begin_tsc);
}

void End_Instrumentation(unsigned int ForceCount, unsigned int Location)
{
        if (!(ioat_perf_instr->prof_location & Location)) return;

        /* Instructions retired */
        Clear_P4_Counter(MSR_IQ_CCCR0, CLEAR_INSTR_RET_CCCR0);

        /* L2 Misses */
        Clear_P4_Counter(MSR_BPU_CCCR0, CLEAR_BSQ_CACHE_REFERENCE_CR_CCCR0);

        /* Instructions retired */
        ioat_perf_instr->cntr_val1 = Read_P4_Counter(MSR_IQ_COUNTER0);

        /* L2 misses */
        ioat_perf_instr->cntr_val2 = Read_P4_Counter(MSR_BPU_COUNTER0);

        /* End TSC */
        rdtscll(ioat_perf_instr->end_tsc);

        /* TSC delta */
        ioat_perf_instr->tsc_delta += ioat_perf_instr->end_tsc -
                                        ioat_perf_instr->begin_tsc;

        ioat_perf_instr->tsc_count++;

        if (ForceCount) {
                ioat_perf_instr->force_count += ForceCount;
        }
}

EXPORT_SYMBOL(Start_Instrumentation);
EXPORT_SYMBOL(End_Instrumentation);

#endif

