/* matmulacc.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include <gsl/gsl_blas.h>

void run_mults(gsl_matrix_view z, gsl_matrix_view m, int nTimes) {
   unsigned int i;
   for (i = 0; i < nTimes; ++i)
      gsl_blas_dgemm(CblasNoTrans, CblasNoTrans,
         1.0, &z.matrix, &m.matrix, 0.0, &z.matrix);
   printf("%lf\n", z.matrix.data[0]);
}

int main(int narg, char **arg) {
   unsigned int i;                     /* general purpose counter */
   struct timespec start_ts, end_ts;   /* structs needed by clock_gettime */
   double start_time, end_time;        /* actual start and stop times in secs */
   double elapsed_time;  /* elapsed time in secs */

   int nTimes = atoi(arg[1]);
   int nRows = atoi(arg[2]);

   double *m_data = malloc(sizeof(double)*nRows*nRows);
   double *z_data = malloc(sizeof(double)*nRows*nRows); 

   srand((unsigned int)time(NULL));

   for (i = 0; i < nRows*nRows; ++i)
      m_data[i] = 0.001 * ((double)rand() / RAND_MAX);

   memcpy(z_data, m_data, sizeof(double)*nRows*nRows);

   /* build matrices */
   gsl_matrix_view z = gsl_matrix_view_array(z_data, nRows, nRows);
   gsl_matrix_view m = gsl_matrix_view_array(m_data, nRows, nRows);

   clock_gettime(CLOCK_REALTIME, &start_ts);
   run_mults(z, m, nTimes);
   clock_gettime(CLOCK_REALTIME, &end_ts); 

   start_time = (double)start_ts.tv_sec + ((double)start_ts.tv_nsec)*0.000000001;
   end_time   = (double)end_ts.tv_sec   + ((double)end_ts.tv_nsec)*0.000000001;
   elapsed_time = end_time - start_time;
   printf("------------------------------------------------------C-- %d %d %lf\n", nTimes, nRows, elapsed_time);
}
