/* threaded program to compute y = a*x; */
/* assumes a square matrix.. and assignes x[i] = i to be the vector */

#include <stdio.h>
#include <pthread.h>
#include <math.h>
#include <sys/time.h>

#define MAX_THREADS 256

void SparseMatVec();
void *matvec_thread();


struct SparseMatrix {
	int numrows;
	int numcols;
	int numentries;
	int *rowptr;
	int *colind;
	double *values;
} a;

struct vector {
	int length;
	double *values;
} x, y;

int nlocal;

main()
{
	FILE *fp;
	char infile[32];
	int i, num_threads;
	pthread_t threads[MAX_THREADS];
	pthread_attr_t  attr;
	hrtime_t start_time, end_time;
	int thread_ids[MAX_THREADS];

	printf("Enter Input File: ");
	scanf("%s", infile);

	printf("Enter Number of Threads: ");
	scanf("%d", &num_threads);

	if (!(fp = fopen(infile,"r")))
	{
		printf("Invalid input file.\n");
		exit(0);
	}

	pthread_attr_init (&attr);
	pthread_attr_setscope (&attr,PTHREAD_SCOPE_SYSTEM);

	ReadSparseMatrixAndVector(fp, &a, &x);

	printf("Read in data ...\n");
	fflush(stdout);

	y.length = a.numrows;
	y.values = (double *) malloc (a.numrows * sizeof(double));

	if (a.numrows % num_threads == 0)
		nlocal = a.numrows / num_threads;
	else
		nlocal = (int)((float)((float)a.numrows
			/(float)num_threads + (float)1.));

	printf("***** %d\n", nlocal);

	start_time = gethrtime();
	for (i=0; i< num_threads; i++)
	{
		thread_ids[i]=i;
		pthread_create(&threads[i], &attr,
			matvec_thread, (void *)(&thread_ids[i]));
	}

	for (i=0; i< num_threads; i++)
		pthread_join(threads[i], NULL);
	end_time = gethrtime();

/*
	for (i=0; i< y.length; i++)
		printf("%lf ", y.values[i]);
	printf("\n");
*/
	printf("Execution time = %lld ms\n", (end_time - start_time)/1000);
}

void *matvec_thread (void *ptr)
{
	int  thread_id, i;
	thread_id = *((int *)ptr);
	for (i = 0; i < 1; i++)
		SparseMatVec(nlocal, a, x, &y, thread_id);
}
	

ReadSparseMatrixAndVector(FILE *fp, struct SparseMatrix *inp, struct vector *x)
{
	int i, temp_entry;
	fscanf(fp, "%d", &(inp -> numrows));
	fscanf(fp, "%d", &(inp -> numentries));
	inp -> numentries = 2 * inp -> numentries;
	printf("** numentries = %d\n", inp -> numentries);
	inp -> numcols = inp -> numrows;
	inp -> rowptr = (int *) malloc ((inp -> numrows + 1)*sizeof(int));

	for (i=0;i< inp -> numrows; i++)
		fscanf(fp, "%d", &(inp -> rowptr[i]));
	inp -> rowptr[i] = inp -> numentries;


	inp -> colind = (int *) malloc (inp -> rowptr[inp -> numrows] *
					sizeof(int));

	for (i = 0; i < inp -> rowptr[inp -> numrows]; i++)
		fscanf(fp, "%d", &(inp -> colind[i]));

	inp -> values = (double *) malloc (inp -> rowptr[inp -> numrows] *
                                        sizeof(double));

	for (i = 0; i < inp -> rowptr[inp -> numrows]; i++)
	{
		fscanf(fp, "%d", &temp_entry);
		inp -> values[i] = (float) temp_entry;
/*
                fscanf(fp, "%lf", &(inp -> values[i]));
*/
	}

	x -> length = inp -> numcols;

	x -> values = (double *) malloc (x -> length * sizeof(double));
	for (i = 0; i < inp -> numcols; i++)
/*
		fscanf(fp, "%lf", &(x -> values[i]));
*/
		x -> values[i] = (float) (i+1);

}

void SparseMatVec(int nlocal, struct SparseMatrix a, struct vector x,
struct vector *y, int thread_id)
{
/* each thread gets nlocal rows;
   the matrix is stored in global address space in compressed row
   storage in arrays: rowptr, colind, values.
   the vector is stored in global address space array x */

	int start_row, end_row, x1, x2, *y1, *col, *scol, i, j;
	double *t1, *y2, *vals, *avals, *savals, temp;

	start_row = thread_id * nlocal;
	end_row = start_row + nlocal;

	if (end_row > a.numrows)
		end_row = a.numrows;

	if (end_row <= start_row)
		return;

	y1 = &(a.rowptr[start_row]);
	y2 = &(y -> values[start_row]);

	vals = &(x.values[0]);
	savals = &(a.values[0]);
	scol = &(a.colind[0]);

	for (i=start_row; i< end_row; i++)
	{
		temp = 0.0;
		x1 = *y1++;
		x2 = *y1;
		col = scol + x1;
		avals = savals + x1;
		for (j = x1; j < x2 ; j++)
			temp += *(avals++) * vals[*(col++)];
		*(y2++) = temp;
	}
}
	


