/* Common headers */
#include <sys/time.h>
#include <sys/resource.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>

/* Check if we are a regular Linux x86 system */
#if (! defined (LETSEE_MIPS_TARGET)) && (! defined (LETSEE_TRAVIATA))
# define LETSEE_REGULAR_TARGET
#endif

/* Papi headers */
#ifdef LETSEE_PAPI
# include <papi.h>
#endif
/* Linux x86 headers */
#ifdef LETSEE_REGULAR_TARGET
# include <sched.h>
#endif
/* Mips headers */
#ifdef LETSEE_MIPS_TARGET
# if defined(__mips) && defined(__GNUC__)

# define PERFMON_TIMER_T unsigned long long
# define BILLION  1000000000L

PERFMON_TIMER_T read_cycle_counter()
{
    struct timespec s;
    clock_gettime(CLOCK_REALTIME, &s);
    return (PERFMON_TIMER_T)
    ((PERFMON_TIMER_T)((PERFMON_TIMER_T)(s.tv_sec)*BILLION)+s.tv_nsec);
}
# endif
#endif

/* Useful macros */
#define ceild(n,d)  ceil(((double)(n))/((double)(d)))
#define floord(n,d) floor(((double)(n))/((double)(d)))
#define max(x,y)    ((x) > (y)? (x) : (y))
#define min(x,y)    ((x) < (y)? (x) : (y))

/*****************************************************************************/
/*****************************************************************************/

/* Useful definitions */
#define LD_CACHE_SIZE 150000
#define FLOAT_TYPE float
#define INT_TYPE int
#define FLOAT_MODIFIER "%f "
#define INT_MODIFIER "%d "
#define ROUND(f) (((f)<0.0) ? (int)((f)-0.5) : (int)((f)+0.5))

/* Statement definitions */
#define S1(i,j) temp2d[i][j] = 0.0
#define S2(i,j,k) temp2d[i][j] += block[i][k] * cos1[j][k]
#define S3(i,j) sum2 = 0.0
#define S4(i,j,k) sum2 += cos1[i][k] * temp2d[k][j]
#define S5(i,j) block[i][j] = ROUND(sum2)

/*****************************************************************************/
/*****************************************************************************/

#ifdef LETSEE_PAPI
void test_fail(char *file, int line, char *call, int retval)
{
   char buf[128];

   memset(buf, '\0', sizeof(buf));
   if (retval != 0)
      fprintf(stdout,"%-40s FAILED\nLine # %d\n", file, line);
   else {
      fprintf(stdout,"%-40s SKIPPED\n", file);
      fprintf(stdout,"Line # %d\n", line);
   }
   if (retval == PAPI_ESYS) {
      sprintf(buf, "System error in %s", call);
      perror(buf);
   } else if (retval > 0) {
      fprintf(stdout,"Error: %s\n", call);
   } else if (retval == 0) {
      fprintf(stdout,"Error: %s\n", call);
   } else {
      char errstring[PAPI_MAX_STR_LEN];
      PAPI_perror(retval, errstring, PAPI_MAX_STR_LEN);
      fprintf(stdout,"Error in %s: %s\n", call, errstring);
   }
   fprintf(stdout,"\n");
   if ( PAPI_is_initialized() ) PAPI_shutdown();
   exit(1);
}
#endif

int main(int argc, char **argv)
{
#ifdef LETSEE_PAPI
  int retval;
  int EventSet = PAPI_NULL;
  long_long values[1];
  long_long all_values[32];
  char descr[PAPI_MAX_STR_LEN];
  PAPI_event_info_t evinfo;
  long double* cache_cleaner =
    (long double*) malloc (150000 * sizeof (long double));
  int cache_iter;

  for (cache_iter = 0; cache_iter < 150000; ++cache_iter)
    cache_cleaner[cache_iter] = M_PI * cache_iter;

  const unsigned int eventlist[] = {
    PAPI_L1_ICA,
    PAPI_L1_ICM,
    PAPI_L2_TCH,
    PAPI_L2_TCM,
    PAPI_TLB_DM,
    PAPI_TLB_IM,
    PAPI_LST_INS,
    PAPI_BR_MSP,
    PAPI_FP_OPS,
    0
  };

  if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
    test_fail(__FILE__, __LINE__, "PAPI_library_init", retval);

  if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
    test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval);
#endif

#ifdef LETSEE_REGULAR_TARGET
  /* Use FIFO scheduler to limit OS interference. */
  struct sched_param schedParam;
  schedParam.sched_priority = 99;
  sched_setscheduler(0, SCHED_FIFO, &schedParam);
#endif

  /***************************************************************************/
  /***************************************************************************/

  /* Prologue */

  // initialize data.
  FLOAT_TYPE sum2 = 42;
#ifdef test_malloc
  /* Array declaration. */
  INT_TYPE** block = (INT_TYPE**) malloc((PARVAL1+1)*sizeof(INT_TYPE*));
  FLOAT_TYPE** cos1 = (FLOAT_TYPE**) malloc((PARVAL1+1)*sizeof(FLOAT_TYPE*));
  FLOAT_TYPE** cos2 = (FLOAT_TYPE**) malloc((PARVAL1+1)*sizeof(FLOAT_TYPE*));
  FLOAT_TYPE** temp2d = (FLOAT_TYPE**) malloc((PARVAL1+1)*sizeof(FLOAT_TYPE*));

#else
  INT_TYPE block[PARVAL1 + 1][PARVAL1 + 1];
  FLOAT_TYPE cos1[PARVAL1 + 1][PARVAL1 + 1];
  FLOAT_TYPE cos2[PARVAL1 + 1][PARVAL1 + 1];
  FLOAT_TYPE temp2d[PARVAL1 + 1][PARVAL1 + 1];
#endif

  long double* cache;
  cache = (long double*) malloc(LD_CACHE_SIZE * sizeof(long double));


  /* Array initialization. */
  unsigned iarray, iarray2;
  for (iarray = 0; iarray <= PARVAL1; ++iarray) {
#ifdef test_malloc
    block[iarray] = (INT_TYPE*) malloc((PARVAL1 + 1) * sizeof(INT_TYPE));
    cos1[iarray] = (FLOAT_TYPE*) malloc((PARVAL1 + 1) * sizeof(FLOAT_TYPE));
    cos2[iarray] = (FLOAT_TYPE*) malloc((PARVAL1 + 1) * sizeof(FLOAT_TYPE));
    temp2d[iarray] = (FLOAT_TYPE*) malloc((PARVAL1 + 1) * sizeof(FLOAT_TYPE));
#endif
    for (iarray2 = 0; iarray2 <= PARVAL1; ++iarray2) {
      block[iarray][iarray2] = M_PI * iarray + iarray2;
      cos1[iarray][iarray2] = M_PI * iarray2 + iarray;
      temp2d[iarray][iarray2] = M_PI + iarray;
    }
  }
  for (iarray = 0; iarray <= PARVAL1; ++iarray) {
    for (iarray2 = 0; iarray2 <= PARVAL1; ++iarray2) {
      cos2[iarray2][iarray] = M_PI * iarray2 + iarray;
    }
  }

  /* Clear the cache */
  for (iarray = 0; iarray < LD_CACHE_SIZE; ++iarray)
    cache[iarray] = M_PI * iarray;

  /***************************************************************************/
  /***************************************************************************/


#ifdef LETSEE_PAPI
  long_long cycles_start, cycles_stop, cycles_avg = 0;
  int evid, eviditer;

  for (evid = 0; eventlist[evid] != 0; evid++)
    {
      PAPI_event_code_to_name(eventlist[evid], descr);
      if (PAPI_add_event(EventSet, eventlist[evid]) != PAPI_OK)
	continue;

      // Clean the cache at each iteration.
      for (cache_iter = 0; cache_iter < 150000; ++cache_iter)
	cache_cleaner[cache_iter] *= M_PI + cache_iter;


      if (PAPI_get_event_info(eventlist[evid], &evinfo) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_get_event_info", retval);


      if ((retval = PAPI_start(EventSet)) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_start", retval);
#endif

      /* Start the counter */
#ifdef LETSEE_MIPS_TARGET
      PERFMON_TIMER_T cycles_start, cycles_stop;
      cycles_start = read_cycle_counter();
#endif
#ifdef LETSEE_TRAVIATA
      struct timeval tv;
      struct timezone tz;
      gettimeofday (&tv, &tz);
      double time_start = (double) tv.tv_sec;
      time_start += (double) tv.tv_usec * 1.0e-6;
#endif
#ifdef LETSEE_REGULAR_TARGET
      unsigned long long int cycle_start, cycle_stop;
      __asm__ volatile ("RDTSC" : "=A" (cycle_start));
#endif

      /***********************************************************************/
      /***********************************************************************/

      /*  Kernel */

      /* Original iterators. */
      int i, j, k;
      /* Parameters. */
      int M = PARVAL1;

      for (i = 0; i < M; i++) {
	for (j = 0; j < M; j++) {
	  S1(i,j);
	  for (k = 0; k < M; k++) {
	    S2(i,j,k);
	  }
	}
      }
      for (i = 0; i < M; i++) {
	for (j = 0; j < M; j++) {
	  S3(i,j);
	    for (k = 0; k < M; k++) {
	      S4(i,j,k);
	    }
	  S5(i,j);
	}
      }

      /***********************************************************************/
      /***********************************************************************/

#ifdef LETSEE_PAPI
      if ((retval = PAPI_read(EventSet, &values[0])) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_read", retval);

      if ((retval = PAPI_stop(EventSet,NULL)) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_stop", retval);

      cycles_avg += cycles_stop - cycles_start;
      all_values[evid] = values[0];

      if ((retval = PAPI_remove_event(EventSet, eventlist[evid])) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_remove_event", retval);
    }

  if ((retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
    test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset", retval);

  // Output measure results.
  printf("Cycles: %llu\n", cycles_avg / evid);
  printf ("Values: ");
  for (eviditer = 0; eviditer < evid; ++eviditer)
    printf ("%llu ", all_values[eviditer]);
  printf ("\n");
#endif
#ifdef LETSEE_MIPS_TARGET
  cycles_stop = read_cycle_counter();
  printf("Cycles: %llu\n", cycle_stop - cycle_start);
#endif
#ifdef LETSEE_TRAVIATA
  gettimeofday (&tv, &tz);
  double time_stop = (double) tv.tv_sec;
  time_stop += (double) tv.tv_usec * 1.0e-6;
  printf ("Cycles: %d\n", (int)((time_stop - time_start) * 1.0e+06));
#endif
#ifdef LETSEE_REGULAR_TARGET
  __asm__ volatile ("RDTSC" : "=A" (cycle_stop));
  printf("Cycles: %llu\n", cycle_stop - cycle_start);
#endif

  /***********************************************************************/
  /***********************************************************************/

  /* Epilogue */

  char end_line = 10;
  char buf[512];
  strcpy(buf, argv[0]);
  strcat(buf, ".output");
  FILE* o_file = fopen(buf, "w");
  for (iarray = 0; iarray <= PARVAL1; ++iarray) {
    for (iarray2 = 0; iarray2 <= PARVAL1; ++iarray2) {
      fprintf(o_file, INT_MODIFIER, block[iarray][iarray2]);
    }
  }
  fprintf(o_file, "%c", end_line);
  fclose(o_file);
#ifdef test_malloc
  for (iarray = 0; iarray <= PARVAL1; ++iarray) {
    free(block[iarray]);
    free(cos1[iarray]);
    free(cos2[iarray]);
    free(temp2d[iarray]);
  }
  free(block);
  free(cos1);
  free(cos2);
  free(temp2d);
#endif

  /***********************************************************************/
  /***********************************************************************/

  return 0;
}

