#include <pthread.h>
#include <time.h>
#include <stdio.h>

#define DIM     512
#define BLOCK   256

/* therefore 4 x 4 = 16 threads in all */

float dot_product();
float a[DIM][DIM], b[DIM][DIM], c[DIM][DIM];
int *thread_structure;
void *compute_block_of_result (void *);

main()
{
        int i, j, k;
	clock_t t1, t2;

        pthread_t block_threads[256];
	pthread_attr_t  attr;

	pthread_attr_init (&attr);
        pthread_attr_setscope (&attr,PTHREAD_SCOPE_SYSTEM);


        thread_structure =(int *) malloc(((DIM*DIM)/(BLOCK*BLOCK)) *
                                sizeof(int));

        for (i=0; i< DIM; i++)
                for (j=0; j<DIM; j++)
                {
                        a[i][j] = (rand()%256) / 100.0 + 1.5;
                        b[i][j] = (rand()%256) / 100.0 + 1.5;
                }

	t1 = clock();

        for (i=0; i<((DIM*DIM)/(BLOCK*BLOCK)); i++)
        {
                thread_structure[i] = i;
                pthread_create(&block_threads[i], &attr, compute_block_of_result,
                        (void *) &thread_structure[i]);
        }

        for (i=0; i<((DIM*DIM)/(BLOCK*BLOCK)); i++)
                pthread_join(block_threads[i], NULL);
	t2 = clock();
        printf("elapsed time %f\n", (float)t2/1000000);

/*

        for (i=0; i< DIM; i++)
        {
                for (j=0; j<DIM; j++)
                        printf("%6.2f ", c[i][j]);
                printf("\n");
        }
*/
}


void *compute_block_of_result(void *ptr)
{
        int i, j, k;
        int row, col;
        int temp;
	float t1;

        temp = *((int *)ptr);

        row = (temp / (DIM/BLOCK))*BLOCK;
        col = (temp % (DIM/BLOCK))*BLOCK;

        for (i=row; i < row+BLOCK; i++)
                for (j=col; j < col+BLOCK; j++)
		{
			t1 = 0.0;
        		for (k=0; k< DIM; k++)
				t1 = t1 + a[i][k] * b[k][j];
			c[i][j] = t1;
		}
			
}


float dot_product(row, col)
int row, col;
{
        float prod;
        int i;

        prod = 0.0;

        for (i=0; i< DIM; i++)
                prod += a[row][i] * b[i][col];

        return(prod);
}


