#include <stdlib.h>
#include <stdio.h>
#ifdef LETSEE_PAPI
# include <papi.h>
#endif
#include <math.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <time.h>
#include <sched.h>



#include <string.h>
#define LD_CACHE_SIZE 1500000
#define FLOAT_TYPE double
#define FLOAT_MODIFIER "%f "


/* Useful macros. */
#define ceild(n,d)  ceil(((double)(n))/((double)(d)))
#define floord(n,d) floor(((double)(n))/((double)(d)))
#define max(x,y)    ((x) > (y)? (x) : (y))
#define min(x,y)    ((x) < (y)? (x) : (y))


#define S1(i) { a[i] = 0; }
#define S2(i,j) { b[j] = (b[j] - a[i]) / 2; }



#ifdef LETSEE_PAPI
void test_fail(char *file, int line, char *call, int retval)
{
   char buf[128];

   memset(buf, '\0', sizeof(buf));
   if (retval != 0)
      fprintf(stdout,"%-40s FAILED\nLine # %d\n", file, line);
   else {
      fprintf(stdout,"%-40s SKIPPED\n", file);
      fprintf(stdout,"Line # %d\n", line);
   }
   if (retval == PAPI_ESYS) {
      sprintf(buf, "System error in %s", call);
      perror(buf);
   } else if (retval > 0) {
      fprintf(stdout,"Error: %s\n", call);
   } else if (retval == 0) {
      fprintf(stdout,"Error: %s\n", call);
   } else {
      char errstring[PAPI_MAX_STR_LEN];
      PAPI_perror(retval, errstring, PAPI_MAX_STR_LEN);
      fprintf(stdout,"Error in %s: %s\n", call, errstring);
   }
   fprintf(stdout,"\n");
   if ( PAPI_is_initialized() ) PAPI_shutdown();
   exit(1);
}
#endif

int main(int argc, char **argv)
{
#ifdef LETSEE_PAPI
  int retval;
  int EventSet = PAPI_NULL;
  long_long values[1];
  long_long all_values[32];
  char descr[PAPI_MAX_STR_LEN];
  PAPI_event_info_t evinfo;
  long double* cache_cleaner =
    (long double*) malloc (150000 * sizeof (long double));
  int cache_iter;

  for (cache_iter = 0; cache_iter < 150000; ++cache_iter)
    cache_cleaner[cache_iter] = M_PI * cache_iter;

  const unsigned int eventlist[] = {
    PAPI_L1_ICA,
    PAPI_L1_ICM,
    PAPI_L2_TCH,
    PAPI_L2_TCM,
    PAPI_TLB_DM,
    PAPI_TLB_IM,
    PAPI_LST_INS,
    PAPI_BR_MSP,
    PAPI_FP_OPS,
    0
  };

  if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT)
    test_fail(__FILE__, __LINE__, "PAPI_library_init", retval);

  if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK)
    test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval);
#endif

  /* Use FIFO scheduler to limit OS interference. */
  struct sched_param schedParam;
  schedParam.sched_priority = 99;
  sched_setscheduler(0, SCHED_FIFO, &schedParam);

  // initialize data.

#ifdef test_malloc
  /* Array declaration. */
  FLOAT_TYPE* a = (FLOAT_TYPE*) malloc((PARVAL1 + 1) * sizeof(FLOAT_TYPE));
  FLOAT_TYPE* b = (FLOAT_TYPE*) malloc((PARVAL2 + 1) * sizeof(FLOAT_TYPE));

#else
  FLOAT_TYPE a[PARVAL1 + 1];
  FLOAT_TYPE b[PARVAL2 + 1];
#endif

  long double* cache;
  cache = (long double*) malloc(LD_CACHE_SIZE * sizeof(long double));


  /* Array initialization. */
  unsigned iarray, iarray2;
  for (iarray = 0; iarray <= PARVAL1; ++iarray) {
    a[iarray] = M_PI * iarray;
  }
  for (iarray = 0; iarray <= PARVAL2; ++iarray) {
    b[iarray] = M_PI * iarray * M_PI / (2352472 * iarray) ;
  }

  /* Clear the cache */
  for (iarray = 0; iarray < LD_CACHE_SIZE; ++iarray)
    cache[iarray] = M_PI;




#ifdef LETSEE_PAPI
  long_long cycles_start, cycles_stop, cycles_avg = 0;
  int evid, eviditer;

  for (evid = 0; eventlist[evid] != 0; evid++)
    {
      PAPI_event_code_to_name(eventlist[evid], descr);
      if (PAPI_add_event(EventSet, eventlist[evid]) != PAPI_OK)
	continue;

      // Clean the cache at each iteration.
      for (cache_iter = 0; cache_iter < 150000; ++cache_iter)
	cache_cleaner[cache_iter] *= M_PI + cache_iter;


      if (PAPI_get_event_info(eventlist[evid], &evinfo) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_get_event_info", retval);


      if ((retval = PAPI_start(EventSet)) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_start", retval);
#else
      unsigned long long int cycles_start, cycles_stop;
#endif

      // Start cycle count.
      __asm__ volatile ("RDTSC" : "=A" (cycles_start));

      // Kernel code.

      /* Original iterators. */
      int i,j;
      /* Parameters. */
      int M=PARVAL1, N=PARVAL2, total=0 ;

      for (i = 0; i <= M; i++)
	{
	  S1(i);
	  for (j = 0; j <= N; j++) {
	    S2(i,j);
	  }
	}


      __asm__ volatile ("RDTSC" : "=A" (cycles_stop));

#ifdef LETSEE_PAPI
      if ((retval = PAPI_read(EventSet, &values[0])) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_read", retval);

      if ((retval = PAPI_stop(EventSet,NULL)) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_stop", retval);

      cycles_avg += cycles_stop - cycles_start;
      all_values[evid] = values[0];

      if ((retval = PAPI_remove_event(EventSet, eventlist[evid])) != PAPI_OK)
	test_fail(__FILE__, __LINE__, "PAPI_remove_event", retval);
    }

  if ((retval = PAPI_destroy_eventset(&EventSet)) != PAPI_OK)
    test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset", retval);

  // Output measure results.
  printf("Cycles: %llu\n", cycles_avg / evid);
  printf ("Values: ");
  for (eviditer = 0; eviditer < evid; ++eviditer)
    printf ("%llu ", all_values[eviditer]);
  printf ("\n");
#else
  printf("Cycles: %llu\n", cycles_stop - cycles_start);
#endif



  char end_line = 10;
  char buf[512];
  strcpy(buf, argv[0]);
  strcat(buf, ".output");
  FILE* o_file = fopen(buf, "w");
  for (iarray = 0; iarray <= PARVAL1; ++iarray) {
    fprintf(o_file, FLOAT_MODIFIER, a[iarray]);
  }
  fprintf(o_file, "%c", end_line);
  for (iarray = 0; iarray <= PARVAL2; ++iarray) {
    fprintf(o_file, FLOAT_MODIFIER, b[iarray]);
  }
  fprintf(o_file, "%c", end_line);
  fclose(o_file);
#ifdef test_malloc
  free(a);
  free(b);
#endif



  return 0;
}

