/*
 * translator.c: this file is part of the Vectorizer project.
 *
 * Vectorizer, a vectorization module.
 *
 * Copyright (C) 2010 Louis-Noel Pouchet
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 3
 * of the License, or (at your option) any later version.
 *
 * The complete GNU Lesser General Public Licence Notice can be found
 * as the `COPYING.LESSER' file in the root directory.
 *
 * Author:
 * Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
 */
#if HAVE_CONFIG_H
# include <vectorizer/config.h>
#endif

#include <vectorizer/common.h>
#include <vectorizer/translator.h>
#include <candl/options.h>
#include <candl/violation.h>
#include <candl/candl.h>
#include <vectorizer/list.h>
#include <clasttools/clastext.h>


/**
 * Code to detect and translate a parallel for.
 *
 */
static
struct clast_parfor*
translate_to_parfor (struct clast_for* f, struct clast_stmt** last,
		     int loop_id)
{
  printf ("[Vectorizer] Detected loop id: %d (iterator:%s) as parallel\n",
	  loop_id, f->iterator);
  f->stmt.op = &stmt_parfor;

  return (struct clast_parfor*) f;
}


static
void
traverse_clast_user_stmt (struct clast_stmt* s, List** list, long int id)
{
  // Traverse the clast.
  for ( ; s; s = s->next)
    {
      if (CLAST_STMT_IS_A(s, stmt_for))
	traverse_clast_user_stmt (((struct clast_for*)s)->body, list, id);
      else if (CLAST_STMT_IS_A(s, stmt_guard))
	traverse_clast_user_stmt (((struct clast_guard*)s)->then, list, id);
      else if (CLAST_STMT_IS_A(s, stmt_block))
	traverse_clast_user_stmt (((struct clast_block*)s)->body, list, id);
      else if (CLAST_STMT_IS_A(s, stmt_user))
	{
	  if (*list == NULL)
	    *list = list_new ((void*)(id++));
	  else
	    *list = list_cons(*list, list_new ((void*) (id++)));
	}
    }
}

static
int
check_par_for (struct clast_for* s,
	       CandlProgram* prog,
	       CandlDependence* deps,
	       int level,
	       int* stmt_offset)
{
  // 1- Collect the user statements surrounded by this loop.
  List* list = NULL;
  traverse_clast_user_stmt (s->body, &list, *stmt_offset);
  int size = list_length (list);
  long int stmtid[size + 1];
  stmtid[size] = -1;
  List* tmp;
  int i, j;
  for (i = 0; i < size; ++i)
    {
      stmtid[i] = (long int) (list->data);
      tmp = list_tail (list);
      free (list);
      list = tmp;
    }

  // 2- Update the program such that schedules for the associated
  // statements are set to '-1' for this loop at this level.
  // If reversal is legal, then the loop is parallel (no loop-carried dep).
  Entier backup[size];
  for (i = 0; i < size; ++i)
    CANDL_init(backup[i]);
  for (i = 0; i < size; ++i)
    {
      CANDL_assign(backup[i],
		   prog->transformation[stmtid[i]]->p
		   [2 * level + 1][level + 1]);
      CANDL_set_si(prog->transformation[stmtid[i]]->p
		   [2 * level + 1][level + 1],
		   -1);
    }

  // 3- Check for the existence of any violated dependence.
  CandlOptions* coptions = candl_options_malloc ();
  CandlViolation* viols = candl_violation (prog, deps, coptions);
  candl_options_free (coptions);
  int is_par = viols == NULL;
  candl_violation_free (viols);

  // 4- Restore the original program scattering.
  for (i = 0; i < size; ++i)
      CANDL_assign(prog->transformation[stmtid[i]]->p
		   [2*level + 1][level + 1],
		   backup[i]);
  for (i = 0; i < size; ++i)
    CANDL_clear(backup[i]);

  return is_par;
}

static
void
traverse_clast_parfor (struct clast_stmt* s,
		       struct clast_stmt** last,
		       CandlProgram* prog,
		       CandlDependence* deps,
		       int level,
		       int* loop_id,
		       int* stmt_offset)
{
  // Traverse the clast.
  for ( ; s; last = &(s->next), s = s->next)
    {
      if (CLAST_STMT_IS_A(s, stmt_for))
	{
	  struct clast_for* f = (struct clast_for*) s;
	  ++(*loop_id);
	  if (check_par_for (f, prog, deps, level, stmt_offset))
	    {
	      struct clast_parfor* pf = translate_to_parfor (f, last, *loop_id);
	      traverse_clast_parfor (pf->body, &(pf->body), prog, deps,
				     level + 1, loop_id, stmt_offset);
	    }
	  else
	    traverse_clast_parfor (f->body, &(f->body), prog, deps,
				   level + 1, loop_id, stmt_offset);
	}
      else if (CLAST_STMT_IS_A(s, stmt_guard))
	traverse_clast_parfor (((struct clast_guard *)s)->then,
			       &(((struct clast_guard *)s)->then),
			       prog, deps, level, loop_id, stmt_offset);
      else if (CLAST_STMT_IS_A(s, stmt_block))
	traverse_clast_parfor (((struct clast_block *)s)->body,
			       &(((struct clast_block *)s)->body),
			       prog, deps, level, loop_id, stmt_offset);
      else if (CLAST_STMT_IS_A(s, stmt_user))
	*stmt_offset += 1;
    }
}


/**
 * This function detect all parallel loops, and change the clast_for
 * node associated to a clast_parfor node.
 *
 */
void
vectorizer_translator_parallel (struct clast_stmt* root,
				scoplib_scop_p scop,
				CandlDependence* deps)
{
  // Build the CandlProgram associated to the scop.
  CandlProgram* cprogram = candl_program_convert_scop (scop, NULL);
  // Add all program scatterings in the candl program.
  CandlMatrix** scheds = XMALLOC(CandlMatrix*, cprogram->nb_statements);
  int i;
  scoplib_statement_p s = scop->statement;
  for (i = 0; i < cprogram->nb_statements; ++i, s = s->next)
    scheds[i] = (CandlMatrix*) scoplib_matrix_copy (s->schedule);
  cprogram->transformation = scheds;

  // Do the traversal: for each for node, check if it is a parfor. If
  // so, replace the clast_for node with a clast_parfor node.
  int loop_id = 0;
  int stmt_offset = 0;
  traverse_clast_parfor (root, NULL, cprogram, deps, 0, &loop_id, &stmt_offset);

  // Be clean.
  candl_program_free (cprogram);
}


static
int
is_innermost (struct clast_stmt* s)
{
  for ( ; s; s = s->next)
    if (CLAST_STMT_IS_A(s, stmt_for) ||
	CLAST_STMT_IS_A(s, stmt_parfor) ||
	CLAST_STMT_IS_A(s, stmt_vectorfor))
      return 0;
  return 1;
}

static
void
traverse_clast_innermost_for (struct clast_stmt* s)
{
  // Traverse the clast.
  for ( ; s; s = s->next)
    {
      if (CLAST_STMT_IS_A(s, stmt_for))
	traverse_clast_innermost_for (((struct clast_for*)s)->body);
      else if (CLAST_STMT_IS_A(s, stmt_parfor))
	{
	  struct clast_parfor* f = (struct clast_parfor*) s;
	  if (is_innermost (f->body))
	    // Inner-most, parallel for loop. Mark it as vectorizable.
	    f->stmt.op = &stmt_vectorfor;
	  else
	    traverse_clast_innermost_for (((struct clast_parfor*)s)->body);
	}
      else if (CLAST_STMT_IS_A(s, stmt_guard))
	traverse_clast_innermost_for (((struct clast_guard*)s)->then);
      else if (CLAST_STMT_IS_A(s, stmt_block))
	traverse_clast_innermost_for (((struct clast_block*)s)->body);
    }
}


/**
 * This function translates all inner-most parallel loops from
 * clast_parfor to clast_vectorfor node.
 *
 */
void
vectorizer_translator_vectorfor (struct clast_stmt* root,
				scoplib_scop_p scop,
				CandlDependence* deps)
{
  // For the moment, simply mark all inner parallel loops as vector loops.
  scop = scop; deps = deps;
  traverse_clast_innermost_for (root);
}
