/*
 * fsspace.c: this file is part of the LetSee project.
 *
 * LetSee, the LEgal Transformation SpacE Explorator.
 *
 * Copyright (C) 2008 Louis-Noel Pouchet
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * The complete GNU General Public Licence Notice can be found as the
 * `COPYING' file in the root directory.
 *
 * Author:
 * Louis-Noel Pouchet <Louis-Noel.Pouchet@inria.fr>
 */
#if HAVE_CONFIG_H
# include <letsee/config.h>
#endif

#include <letsee/fsspace.h>


#undef echo
#define echo(S) { printf(#S); printf("\n"); }



static
s_vertex_t**
compute_posnegclos (s_graph_t* graph, int start, int stop)
{
  int i, j, k, l;
  int count = 0;
  int sz = graph->vertex_count;
  s_vertex_t** res = XMALLOC(s_vertex_t*, sz + 1);
  s_vertex_t* v;
  s_edge_t* e;
  // It's a null-terminated array.
  for (i = 0; i <= sz; ++i)
    res[i] = NULL;

  // Create the (partial) adjacency matrix.
  int** matrix = XMALLOC(int*, sz);
  for (i = 0; i < sz; ++i)
    {
      matrix[i] = XMALLOC(int, sz);
      for (j = 0; j < sz; ++j)
	matrix[i][j] = 0;
    }

  // Fill the adjacency matrix.
  for (e = graph->first_edge; e; e = e->next)
    if (start == 0)
      {
	if (e->src->id <= stop && e->dst->id <= stop)
	  {
	    matrix[e->src->id][e->dst->id] = 1;
	    matrix[e->dst->id][e->src->id] = 1;
	  }
      }
    else
      if (e->src->id >= start && e->dst->id >= start)
	{
	  matrix[e->src->id][e->dst->id] = 1;
	  matrix[e->dst->id][e->src->id] = 1;
	}

  // Compute the transitive closure with Warshall algorithm.
  for( i = 0; i < sz; i++)
    for (j = 0; j < sz; j++)
      if (matrix[i][j] == 1)
	for (k = 0; k < sz; k++)
	  if (matrix[j][k] == 1)
	    matrix[i][k] = 1;

  // Reset start to the id of F.
  if (start == 0)
    start = stop;
  // F is in the transitive closure.
  matrix[start][start] = 1;
  // Compute F+/-.
  for (i = 0, count = 0, v = graph->root; i < sz; ++i, v = v->next)
    if (matrix[start][i] != 0)
      res[count++] = v;

  // Be clean.
  for (i = 0; i < sz; ++i)
      XFREE(matrix[i]);
  XFREE(matrix);

  return res;
}


static
s_vertex_t**
compute_posclos (s_graph_t* deps, s_vertex_t* r)
{
  return compute_posnegclos (deps, r->id, deps->vertex_count - 1);
}


static
s_vertex_t**
compute_negclos (s_graph_t* deps, s_vertex_t* r)
{
  return compute_posnegclos (deps, 0, r->id);
}


static
int
must_skew (s_fm_solution_t* t, int i, int j,
	   CandlProgram* program)
{
  if (t == NULL)
    // Should never occur.
    return 0;

  int k;
  s_fm_vector_t* v;
  CandlStatement* st1 = program->statement[i];
  CandlStatement* st2 = program->statement[j];
  s_fm_system_t* s = fm_solution_to_system (t);

  // Add \sum i_R > 0.
  v = fm_vector_alloc (s->nb_cols);
  for (k = 0; k < st1->depth; ++k)
    fm_vector_assign_int_idx (v, 1, k + 1);
  fm_vector_assign_int_idx (v, -1, v->size - 1);
  fm_vector_set_ineq (v);
  fm_system_add_line (s, v);

  // Add \sum i_S > 0.
  v = fm_vector_alloc (s->nb_cols);
  for (; k < st1->depth + st2->depth; ++k)
    fm_vector_assign_int_idx (v, 1, k + 1);
  fm_vector_assign_int_idx (v, -1, v->size - 1);
  fm_vector_set_ineq (v);
  fm_system_add_line (s, v);

  // Check for the lexmin.
  PipQuast* quast = fm_piptools_pip (s, NULL, FM_PIPTOOLS_INT);
  fm_system_free (s);
  if (quast == NULL || quast->list == NULL)
    // Should never occur.
    return 0;
  else
    {
      PipList* l = quast->list;
      v = fm_vector_alloc (t->size + 1);
      for (k = 1; l; l = l->next, ++k)
	fm_vector_assign_int_idx (v, l->vector->the_vector[0], k);
    }
  pip_quast_free (quast);
  int l;
  int ret = 1;
  int sz = 0;
  for (l = 1; l < st1->depth; ++l)
    if (v->vector[l].num != 0)
      ++sz;
  if (sz == 1)
    for (sz = 0, ++l; l < st1->depth + st2->depth; ++l)
      if (v->vector[l].num != 0)
	++sz;
  fm_vector_free (v);

  return sz > 1;
}


static
int
must_distribute (s_fm_solution_t* t, int ii, int jj,
		 CandlProgram* program)
{
  CandlStatement* st1 = program->statement[ii];
  CandlStatement* st2 = program->statement[jj];
  s_fm_vector_t* v1 = fm_vector_alloc (t->size + 2);
  s_fm_vector_t* v2 = fm_vector_alloc (t->size + 2);
  int i;

  // Add \sum i > 0.
  for (i = 0; i < st1->depth; ++i)
    fm_vector_assign_int_idx (v1, 1, i + 1);
  fm_vector_assign_int_idx (v1, -1, v1->size - 1);
  fm_vector_set_ineq (v1);

  // Add \sum i > 0.
  for (; i < st1->depth + st2->depth; ++i)
    fm_vector_assign_int_idx (v2, 1, i + 1);
  fm_vector_assign_int_idx (v2, -1, v2->size - 1);
  fm_vector_set_ineq (v2);

  s_fm_solution_t* sol = fm_solution_dup (t);
  fm_solution_add_line_at (sol, v1, st1->depth);
  fm_solution_add_line_at (sol, v2, i);

  int ret = fm_piptools_check_sol (sol, FM_PIPTOOLS_INT);
  fm_solution_free (sol);
/*   if (!ret) */
/*     printf ("MUST DISTRIBUTE %d and %d\n", ii + 1, jj + 1); */
  return !ret;
}

static
int
must_fuse (s_fm_solution_t* t, int i, int j,
	   CandlProgram* program)
{
  s_fm_vector_t* v = fm_vector_alloc (t->size + 1);
  fm_vector_assign_int_idx (v, 1, v->size - 1);
  int ret = fm_solution_point_included (t, v);
  fm_vector_free (v);

/*   if (!ret) */
/*     printf ("MUST FUSE %d and %d\n", i + 1, j + 1); */
  return !ret;
}

static
int
is_legal_perm (s_fm_solution_t* t, int i, int j,
	       int ii, int jj,
	       CandlProgram* program)
{
  if (t == NULL)
    return 0;

  int k;
  s_fm_vector_t* v;
  CandlStatement* st1 = program->statement[i];
  CandlStatement* st2 = program->statement[j];
  s_fm_system_t* s = fm_solution_to_system (t);
  // Put the two iterator variables as the 'first' variable of their
  // own iterator variable segment.
  fm_system_swap_column (s, ii + 1, st1->depth);
  fm_system_swap_column (s, st1->depth + jj + 1, st1->depth + st2->depth);
  // Add \sum i_R > 0.
  v = fm_vector_alloc (s->nb_cols);
  for (k = 0; k < st1->depth; ++k)
    fm_vector_assign_int_idx (v, 1, k + 1);
  fm_vector_assign_int_idx (v, -1, v->size - 1);
  fm_vector_set_ineq (v);
  fm_system_add_line (s, v);

  // Add \sum i_S > 0.
  v = fm_vector_alloc (s->nb_cols);
  for (; k < st1->depth + st2->depth; ++k)
    fm_vector_assign_int_idx (v, 1, k + 1);
  fm_vector_assign_int_idx (v, -1, v->size - 1);
  fm_vector_set_ineq (v);
  fm_system_add_line (s, v);

  // Check for the lexmin.
  PipQuast* quast = fm_piptools_pip (s, NULL, FM_PIPTOOLS_INT);
  fm_system_free (s);
  if (quast == NULL || quast->list == NULL)
    return 0;
  else
    {
      PipList* l = quast->list;
      v = fm_vector_alloc (t->size + 1);
      for (k = 1; l; l = l->next, ++k)
	fm_vector_assign_int_idx (v, l->vector->the_vector[0], k);
    }
  pip_quast_free (quast);
  int l;
  int ret = 0;
  if (v->vector[st1->depth].num != 0 &&
      v->vector[st1->depth + st2->depth].num != 0)
    {
      for (l = 1; l < st1->depth && v->vector[l].num == 0;
	   ++l)
	;
      if (l == st1->depth)
	{
	  for (++l; l < st1->depth + st2->depth && v->vector[l].num == 0; ++l)
	    ;
	  if (l == st1->depth + st2->depth)
	    ret = 1;
	}
    }
/*   if (ret) */
/*     printf ("LEGAL PERM %c%d,%c%d\n", 'i' + ii, i + 1, 'i' + jj, j + 1); */
  fm_vector_free (v);
  return ret;
}

static
s_fm_solution_t*
build_th_constraints (s_graph_t* depgraph,
		      CandlProgram* program,
		      s_ls_options_t* options,
		      int i,
		      int j)
{
  // Create the temporary prgram containing i and j.
  CandlProgram* p = candl_program_malloc (2);
  CandlStatement* stmts[2];
  stmts[0] = program->statement[i];
  stmts[1] = program->statement[j];
  p->statement = stmts;
  p->nb_statements = 2;
  p->context = program->context;

  // Backup originial statement labels.
  int id1 = stmts[0]->label;
  int id2 = stmts[1]->label;

  // Create the temporary dependence graph.
  s_graph_t* g = ls_graph_alloc ();
  s_vertex_t* v;
  CandlDependence* dep;
  int uid = 0;
  for (v = depgraph->root; v; v = v->next)
    {
      dep = v->data;
      if (dep->source->label == id1 || dep->source->label == id2)
	if (dep->target->label == id1 || dep->target->label == id2)
	  ls_graph_create_vertex (g, dep, uid++);
    }

  // Set the new temporary labels.
  stmts[0]->label = 0;
  stmts[1]->label = 1;

  // Compute the legal space.
  int verb = options->verbose;
  int ms = options->maxscale_solver;
  options->verbose = 0;
  options->maxscale_solver = 1;
  s_fm_solution_t* sol = ls_farkas_build_th_constraints
    (g, p, options);
  options->verbose = verb;
  options->maxscale_solver = ms;
  // Restore original label.
  stmts[0]->label = id1;
  stmts[1]->label = id2;

  // Be clean.
  ls_graph_free (g);
  XFREE(p);

  return sol;
}

static
void
dotty_print_fuse (int* screen, CandlProgram* program)
{
  int i, j;

  FILE* f = fopen("/tmp/fuse.dot", "w");
  fprintf (f, "digraph G {\n");
  for (i = 0; i < program->nb_statements - 1; ++i)
    for (j = i + 1; j < program->nb_statements; ++j)
      {
	if (screen[ls_oset_idx_t (program->nb_statements, i + 1, j + 1) - 1]
	    == 1)
	  fprintf (f, "S%d -> S%d\n", i, j);
      }
  fprintf (f, "}\n");
  fclose (f);
}

static
void
update_transclos_fuse (int* screen, CandlProgram* program,
		       int ii, int jj, s_graph_t* deps)
{
  int i, j, k;
  int sz = program->nb_statements;
  int matrix[sz][sz];
  s_vertex_t** negclos;
  s_vertex_t** posclos;
  s_vertex_t* clos[program->nb_statements];
  int count = 0;

  // (1) Update the fusion information such that, if R - S must be
  // fused, any statement between R and S on which S depends and
  // depending on R is fused in the same component.
  negclos = compute_posnegclos (deps, 0, program->statement[jj]->label);
  posclos = compute_posnegclos (deps, program->statement[ii]->label,
				program->nb_statements - 1);
  for (i = 0; posclos[i] != NULL; ++i)
    {
      for (j = 0; negclos[j] != NULL; ++j) {
	if (posclos[i] == negclos[j] &&
	    posclos[i]->id > ii && posclos[i]->id < jj &&
	    negclos[j]->id > ii && negclos[j]->id < jj)
	  break;
      }
      if (negclos[j]) {
	clos[count++] = posclos[i];
      }
    }
  clos[count] = NULL;

  for (i = 0; clos[i] != NULL; ++i)
    {
      screen[ls_oset_idx_p (program->nb_statements,
			    ii + 1,
			    clos[i]->id + 1) - 1] = 0;
      screen[ls_oset_idx_t (program->nb_statements,
			    ii + 1,
			    clos[i]->id + 1) - 1] = 1;
      screen[ls_oset_idx_p (program->nb_statements,
			    clos[i]->id + 1,
			    jj + 1) - 1] = 0;
      screen[ls_oset_idx_t (program->nb_statements,
			    clos[i]->id + 1,
			    jj + 1) - 1] = 1;
    }

  // (2) Compute the adjacency matrix of the must-fuse graph
  for (i = 0; i < sz - 1; ++i)
    for (j = i + 1; j < sz; ++j)
      {
	if (screen[ls_oset_idx_t (sz, i + 1, j + 1) - 1]
	    == 1)
	  {
	    matrix[i][j] = 1;
	    matrix[j][i] = 1;
	  }
	else
	  {
	    matrix[i][j] = 0;
	    matrix[j][i] = 0;
	  }
      }

  // (3) Compute the transitive closure of the must-fuse graph.
  for( i = 0; i < sz; i++)
    for (j = 0; j < sz; j++)
      if (matrix[i][j] == 1)
	for (k = 0; k < sz; k++)
	  if (matrix[j][k] == 1)
	    matrix[i][k] = 1;

  int iid, jid;

  // (4) Update the screen with the transitively covered must-fuse info.
  for (i = 0; i < sz; ++i)
    for (j = i + 1; j < sz; ++j)
      if (matrix[i][j] == 1)
	{
	  screen[ls_oset_idx_p (sz, i + 1, j + 1) - 1] = 0;
	  screen[ls_oset_idx_t (sz, i + 1, j + 1) - 1] = 1;
	}
}

s_graph_t*
ls_fsspace_build_fusiongraph (s_graph_t* deps,
			      CandlProgram* program,
			      CandlDependence* dependences,
			      s_ls_options_t* options,
			      int* screen)
{
  int i, j;
  int ii, jj;
  s_graph_t* F = ls_graph_alloc();
  s_vertex_t* v;
  s_vertex_t* stmts[program->nb_statements];
  s_vertex_t* subst[program->nb_statements];
  s_fm_solution_t* t;
  CandlDependence* tmp;
  s_vertex_t** negclos;
  s_vertex_t** posclos;
  s_edge_t* e;

  for (i = 0, v = deps->root; i < program->nb_statements; ++i, v = v->next)
    stmts[i] = subst[i] = v;

  for (i = 0; i < program->nb_statements - 1; ++i)
    for (j = i + 1; j < program->nb_statements; ++j)
      {
	// Create local dependence graph.
	s_graph_t* depgraph = ls_graph_alloc ();
	int uid = 0;
	// (1) Collect dependences i -> j or j -> i
	for (tmp = dependences; tmp; tmp = tmp->next)
	  if ((tmp->source->label == i && tmp->target->label == j)
	      || (tmp->source->label == j && tmp->target->label == i))
	    ls_graph_create_vertex (depgraph, tmp, uid++);

	// (2) There is no dependence, no need to check fusability.
	if (depgraph->vertex_count == 0)
	  continue;

	// (3) Complete the local dependence graph with self dependences.
	for (tmp = dependences; tmp; tmp = tmp->next)
	  if ((tmp->source->label == i && tmp->target->label == i)
	      || (tmp->source->label == j && tmp->target->label == j))
	    ls_graph_create_vertex (depgraph, tmp, uid++);

	s_vertex_t* st_i = ls_graph_create_vertex
	  (F, stmts[i]->data, ((CandlStatement*)(stmts[i]->data))->label);
	s_vertex_t* st_j = ls_graph_create_vertex
	  (F, stmts[j]->data, ((CandlStatement*)(stmts[j]->data))->label);

	// There is a dependence. Variable t_ij can be eliminated (p_ij = 0).
	negclos = compute_negclos (deps, st_i);
	posclos = compute_posclos (deps, st_j);
	for (ii = 0; negclos[ii] != NULL; ++ii)
	  for (jj = 0; posclos[jj] != NULL; ++jj)
	    {
	      if (screen[ls_oset_idx_t (program->nb_statements,
					negclos[ii]->id + 1,
					posclos[jj]->id + 1) - 1]
		  == LS_OSET_UNSET)
		screen[ls_oset_idx_t (program->nb_statements,
				      negclos[ii]->id + 1,
				      posclos[jj]->id + 1) - 1] = LS_OSET_ELIM;
	    }
	XFREE(negclos);
	XFREE(posclos);

	// Compute T_RS.
	if (options->verbose)
	  {
	    fprintf (options->out_file, ".");
	    fflush (options->out_file);
	  }
	t = build_th_constraints (depgraph, program, options, i, j);

	// Check for distribution.
	if (must_distribute (t, i, j, program))
	  {
	    // Apply prop. 3.
	    negclos = compute_negclos (deps, st_i);
	    posclos = compute_posclos (deps, st_j);
	    for (ii = 0; negclos[ii] != NULL; ++ii)
	      for (jj = 0; posclos[jj] != NULL; ++jj)
		{
		  screen[ls_oset_idx_p (program->nb_statements,
					negclos[ii]->id + 1,
					posclos[jj]->id + 1) - 1] = 1;
		  screen[ls_oset_idx_t (program->nb_statements,
					negclos[ii]->id + 1,
					posclos[jj]->id + 1) - 1] = 0;
		}
	    for (e = F->first_edge; e; e = e->next)
	      {
		if (e->src->id == i && e->dst->id == j ||
		    e->dst->id == i && e->src->id == j)
		  ls_graph_remove_edge (F, e);
	      }
	    continue;
	  }
	// Check for fusion.
	if (must_fuse (t, i, j, program))
	  {
	    screen[ls_oset_idx_p (program->nb_statements, i + 1, j + 1)- 1] = 0;
	    screen[ls_oset_idx_t (program->nb_statements, i + 1, j + 1)- 1] = 1;
	    update_transclos_fuse (screen, program, i, j, deps);
	  }
	// Check for skew.
	if (must_skew (t, i, j, program))
	  {
	    int* val = XMALLOC(int, 2);
	    val[0] = -1;
	    val[1] = -1;
	    ls_graph_create_edge (F, st_i, st_j, val);
	    continue;
	  }
	// Check for each (ir,is).
	for (ii = 0; ii < ((CandlStatement*)(stmts[i]->data))->depth; ++ii)
	  for (jj = 0; jj < ((CandlStatement*)(stmts[j]->data))->depth; ++jj)
	    {
	      if (is_legal_perm (t, i, j, ii, jj, program))
		{
		  int* val = XMALLOC(int, 2);
		  val[0] = ii;
		  val[1] = jj;
		  ls_graph_create_edge (F, st_i, st_j, val);
		}
	    }

	// Some cleaning.
	fm_solution_free (t);
	ls_graph_free (depgraph);
      }

  if (options->verbose)
    fprintf (options->out_file, "\n");

  return F;
}


static
int
check_fusible (s_graph_t* deps, s_graph_t* fgraph, s_vertex_t** path,
	       int b, int e)
{


  return 1;
}


static
void
build_comp (s_graph_t* g, s_vertex_t* v)
{
  int i;
  s_vertex_t** path = g->usr;
  for (i = 0; i < g->vertex_count && path[i] != NULL; ++i)
    ;
  path[i] = v;
}


s_graph_t*
ls_fsspace_expand_fusion_graph (CandlProgram* program, s_graph_t* fgraph)
{
  s_graph_t* g = ls_graph_alloc ();
  s_vertex_t* v;
  int max_depth = 0;
  int i, j;
  int id = 0;

  // Compute the maximal depth.
  for (i = 0; i < program->nb_statements; ++i)
    if (program->statement[i]->depth > max_depth)
      max_depth = program->statement[i]->depth;

  // Allocate an array for the expansion of fgraph, to ease vertex
  // indexing.
  s_vertex_t* vertices[program->nb_statements][max_depth];

  // Create the extended fusion graph.
  for (i = 0; i < program->nb_statements; ++i)
    for (j = 0; j < program->statement[i]->depth; ++j)
      {
	v = ls_graph_create_vertex (g, program->statement[i], id++);
	vertices[i][j] = v;
      }

  // Inspect the standard fusion graph to create necessary edges in
  // the extended fusion graph.
  s_edge_t* e;
  for_all_edges(e, fgraph)
    {
      int src_id = e->src->id;
      int dst_id = e->dst->id;
      int* val = (int*) (e->data);
      if (val[0] == val[1] == -1)
	fprint (stderr, "ERROR: skew edge encountered, not implemented.\n");
      ls_graph_create_edge
	(g, vertices[src_id][val[0]], vertices[dst_id][val[1]], NULL);
    }

  // Trim the graph.
  ls_graph_trim (g);

  return g;
}

int
check_successor_in_group (s_vertex_t* n, int nb_groups, int nb_statements)
{
  int i;
  int k;
  s_fm_list_t* l;
  s_fm_list_t* list;
  int count = 0;
  int mark[nb_statements];

  for (i = 0; i < nb_statements; ++i)
    mark[i] = 0;
  for (k = 0, list = n->in; k < 2; ++k, list = n->out)
    for (l = list; l; l = l->next)
      {
	CandlStatement* s;
	if (list == n->in)
	  s = (CandlStatement*) ((s_edge_t*) (l->data))->src->data;
	else
	  s = (CandlStatement*) ((s_edge_t*) (l->data))->dst->data;
	mark[s->label] = 1;
      }
  for (i = 0; i < nb_statements; ++i)
    if (mark[i] != 0)
      ++count;

  return count == nb_groups;
}

static
int
count_groups (s_graph_t* g)
{
  int count = 0;
  s_vertex_t* v;
  int i;
  int groups[g->vertex_count];
  for (i = 0; i < g->vertex_count; ++i)
    groups[i] = 0;
  for_all_nodes(v, g)
    {
      CandlStatement* s = (CandlStatement*) (v->data);
      if (groups[s->label] == 0)
	++count;
      groups[s->label] = 1;
    }

  return count;
}

static
int
check_has_clique (s_graph_t* g)
{
  s_vertex_t* v;
  for_all_nodes(v, g)
    {


    }
  return 1;
}

int
ls_fsspace_has_clique (s_graph_t* g, CandlProgram* p)
{
  int i;
  // Count the number of groups represented.
  int nb_groups = count_groups (g);

  // Iterate on all nodes, and check there is an edge between this
  // node and all other groups. If not, remove the node.
  s_vertex_t* n;
  s_vertex_t* next;
  for (n = g->root; n; )
    {
      next = n->next;
      if (! check_successor_in_group (n, nb_groups, p->nb_statements))
	ls_graph_remove_vertex (g, n);
      n = next;
    }

  // If there is not enough group, then there is no clique.
  if (count_groups (g) < nb_groups)
    return 0;

  // Check if there is a clique going through all groups.
  return check_has_clique (g);
}


int
ls_fsspace_is_fusable_set(s_graph_t* g, int* stmt_ids,
			  CandlProgram* p)
{
  // Build a subgraph with all vertices in stmt_ids.
  s_vertex_t* n;
  s_vertex_t* next;
  s_graph_t* subg = ls_graph_dup (g);
  int i;
  for (n = subg->root; n; )
    {
      next = n->next;
      CandlStatement* s = (CandlStatement*) (n->data);
      for (i = 0; stmt_ids[i] != -1 && s->label != stmt_ids[i]; ++i)
	;
      if (stmt_ids[i] == -1)
	ls_graph_remove_vertex (subg, n);
      n = next;
    }

  // Check if there is a clique in the subgraph.
  return ls_fsspace_has_clique (subg, p);
}

static
int
check_fuse_v2(s_vertex_t** elem, s_graph_t* fgraph,
	      s_graph_t* newfgraph, CandlProgram* p)
{
  int i;
  for (i = 0; elem[i]; ++i)
    ;
  int* intelems = XMALLOC(int, i + 1);
  intelems[i] = -1;
  for (i = 0; elem[i]; ++i)
    intelems[i] = elem[i]->id;

  int ret = ls_fsspace_is_fusable_set(newfgraph, intelems, p);
  XFREE(intelems);

  return ret;
}


static
s_queue_t*
ls_queue_alloc ()
{
  s_queue_t* ret = XMALLOC(s_queue_t, 1);
  ret->data = ret->next = NULL;

  return ret;
}

static
void*
ls_queue_get (s_queue_t** q)
{
  if (*q == NULL)
    return NULL;

  s_queue_t* next = (*q)->next;
  void* data = (*q)->data;
  if (next != NULL)
    {
      (*q)->data = next->data;
      (*q)->next = next->next;
      XFREE(next);
    }
  else
    {
      XFREE(*q);
      *q = NULL;
    }

  return data;
}

static
void
ls_queue_add (s_queue_t** q, void* data)
{
  s_queue_t* qq;
  for (qq = *q; qq && qq->next != NULL && qq->data != data; qq = qq->next)
      ;
  if (qq != NULL && qq->data == data)
    return;
  if (qq == NULL)
    *q = qq = ls_queue_alloc ();
  else
    {
      qq->next = ls_queue_alloc ();
      qq = qq->next;
    }
  qq->data = data;
}

static
int
ls_queue_contains (s_queue_t* q, void* data)
{
  while (q)
    if (q->data == data)
      return 1;
    else
      q = q->next;

  return 0;
}

static
int
check_fuse(s_vertex_t** elem, s_graph_t* fgraph)
{
  int i, j;
/*   for (i = 0; elem[i]; ++i) */
/*     printf ("%d ", elem[i]->id); */
/*   printf ("\n"); */

  // Build the subgraph corresponding to this set of nodes.
  s_graph_t* subg = ls_graph_dup (fgraph);
  s_vertex_t* n;
  s_vertex_t* next;
  for (n = subg->root; n; )
    {
      next = n->next;
      for (i = 0; elem[i] && elem[i]->id != n->id; ++i)
	;
      if (elem[i] == NULL)
	ls_graph_remove_vertex (subg, n);
      n = next;
    }

  // Compute the number of iterators involved.
  int nb_it = 0;
  for (i = 0; elem[i]; ++i)
    {
      CandlStatement* s = (CandlStatement*) (elem[i]->data);
      nb_it += s->depth;
    }
  // Build the system.
  s_fm_system_t* s = fm_system_alloc (subg->edge_count, nb_it + 2);
  s_edge_t* e;
  int count = 0;
  ls_graph_print (stdout, subg);
  for_all_edges(e, subg)
    {
      int offset_src = 1;
      for (i = 0; elem[i]->id != e->src->id; ++i)
	offset_src += ((CandlStatement*) (elem[i]->data))->depth;
      int offset_dst = 1;
      for (i = 0; elem[i]->id != e->dst->id; ++i)
	offset_dst += ((CandlStatement*) (elem[i]->data))->depth;
      fm_vector_assign_int_idx(s->lines[count], 1,
			       offset_src + ((int*)(e->data))[0]);
      fm_vector_assign_int_idx(s->lines[count], -1,
			       offset_dst + ((int*)(e->data))[1]);
      ++count;
    }
/*   printf ("System built: \n"); */
/*   fm_system_print (stdout, s); */

  // Remove 0-columns.
  for (i = 1; i < s->nb_cols - 1; ++i)
    {
      for (j = 0; j < s->nb_lines; ++j)
	if (s->lines[j]->vector[i].num != 0)
	  break;
      if (j == s->nb_lines)
	{
	  fm_system_remove_column(s, i);
	  --i;
	}
    }

/*   printf ("System cleaned: \n"); */
  fm_system_print (stdout, s);


  // perform gaussian elimination.
  s_fm_compsol_t* cs = fm_compsol_init_sys (s);


/*   printf ("Compsol sol: \n"); */
  fm_solution_print (stdout, cs->poly);
  fm_solution_print (stdout, cs->redeq);
/*   printf ("reduc: %d\n", cs->nb_reduc); */

  // Inspect the dimensionality.
  int dim = cs->size - cs->nb_reduc;

  // Be clean.
  fm_system_free (s);
  fm_compsol_free (cs);

  // The nodes are fusable iff the dimension of the resulting space is 1.
/*   printf ("dim is: %d\n", dim); */
  return dim == 1;
}

static
void
add_nonfuse_cst(s_vertex_t** elem, s_fm_system_t* s, int* screen,
		int nb_stmts)
{
  int i;
  s_fm_vector_t* cst = fm_vector_alloc (s->nb_cols);
  fm_vector_set_ineq (cst);
  for (i = 0; elem[i + 1]; ++i)
    {
      fm_vector_assign_int_idx (cst, -1, ls_oset_idx_t
				(nb_stmts,
				 elem[i]->id + 1,
				 elem[i + 1]->id + 1));
    }
  fm_vector_assign_int_idx (cst, i - 1, cst->size - 1);
  if (ls_oset_needed_cst (cst, screen))
    fm_system_add_line (s, cst);
}

void
ls_fsspace_prune_oset(s_fm_system_t* s,
		      s_graph_t* fgraph,
		      s_graph_t* newfgraph,
		      CandlProgram* program,
		      int* screen)
{
  int i, j;
  int max = fgraph->vertex_count;
  s_queue_t* q = NULL;
  s_queue_t* nq = NULL;
  s_vertex_t* v;
  s_vertex_t* v1;
  s_vertex_t* v2;
  s_vertex_t** elem;

  // Initialize the queue with all pairs of nodes.
  for (v1 = fgraph->root; v1; v1 = v1->next)
    for (v2 = v1->next; v2; v2 = v2->next)
      {
	elem = XMALLOC(s_vertex_t*, max + 1);
	elem[0] = v1;
	elem[1] = v2;
	elem[3] = NULL;
	ls_queue_add (&q, elem);
      }
  // Iterate on all legal superset of fusable sets.
  for (i = 3; q && i <= fgraph->vertex_count; ++i)
    {
      nq = NULL;
      while (q)
	{
	  elem = ls_queue_get (&q);
	  for_all_nodes(v, fgraph)
	    {
	      // Find the 'maximal' node in the element.
	      for (j = 0; elem[j + 1]; ++j)
		;
	      // FFwd to the next node after the maximal one.
	      while (v && elem[j]->id >= v->id)
		v = v->next;
	      // No more node to test for.
	      if (v == NULL)
		break;
	      // extend the component with 'v'.
	      elem[j + 1] = v;
	      elem[j + 2] = NULL;
	      if (ls_queue_contains(q, elem))
		continue;
	      if (check_fuse(elem, fgraph))
	      //if (check_fuse_v2(elem, fgraph, newfgraph, program))
		ls_queue_add (&nq, elem);
	      else
		add_nonfuse_cst(elem, s, screen, program->nb_statements);
	    }
	}
      q = nq;
    }
}


s_fm_system_t*
ls_fsspace_build_fset_v2 (s_graph_t* deps,
			  CandlProgram* program,
			  s_ls_options_t* options,
			  int* screen,
			  s_graph_t* fgraph)
{
  s_fm_system_t* s;

  // Convert to the expanded fusion graph.
  s_graph_t* newfgraph = ls_fsspace_expand_fusion_graph (program, fgraph);
/*   ls_graph_free (fgraph); */

  // All pairwise constraints are known, build the set.
  s = ls_oset_build_system (program->nb_statements, screen);

  // Compute additional constraints of non-fusability (between more
  // than 2 statements).
  ls_fsspace_prune_oset(s, fgraph, newfgraph, program, screen);

  // Clear the fusion graph.
  s_edge_t* e;
  for_all_edges(e, newfgraph)
    XFREE(e->data);
  ls_graph_free (newfgraph);

  return s;
}

s_fm_system_t*
ls_fsspace_build_prune_oset (s_graph_t* deps,
			     CandlProgram* program,
			     s_ls_options_t* options,
			     int* screen,
			     s_graph_t* fgraph)
{
  int i, j;
  s_fm_system_t* s;
  CandlDependence* dep;
  CandlDependence* dependences;
  CandlOptions* candl_opt;
  int nb_stmts = program->nb_statements;
  int* val;
  int* val2;

  // Compute pairwise-fusion, from RAR.
  // Criterion (1): if there's no dep (including RAR), don't fuse.
  candl_opt = candl_options_malloc ();
  candl_opt->scalar_privatization = 1;
  candl_opt->rar = 1;
  dependences = candl_dependence (program, candl_opt);
  candl_options_free (candl_opt);
/*   for (i = 0; i < nb_stmts * (nb_stmts - 1); ++i) */
/*     printf ("%d ", screen[i]); */
/*   printf("\n"); */

  for (i = 0; i < nb_stmts; ++i)
    for (j = i + 1; j < nb_stmts; ++j)
      {
	for (dep = dependences; dep; dep = dep->next)
	  if (dep->source->label == i || dep->source->label == j)
	    if (dep->target->label == j || dep->target->label == j)
	      if (dep->source != dep->target)
		break;
	if (! dep)
	  {
/* 	    printf("PRUNED (1): t %d, %d\n", i + 1, j + 1); */
	    val = &(screen[ls_oset_idx_t (nb_stmts, i + 1, j + 1) - 1]);
	    val2 = &(screen[ls_oset_idx_p (nb_stmts, i + 1, j + 1) - 1]);
	    if (*val == LS_OSET_UNSET)
	      *val = 0;
	    else if (*val == LS_OSET_ELIM && *val2 == LS_OSET_UNSET)
	      *val2 = 1;
	  }
      }

  candl_dependence_free (dependences);

  // Criterion 2: If there is an inner, loop-carried dep, for both s1
  // and s2, do not fuse (favor vectorization).
  candl_opt = candl_options_malloc ();
  candl_opt->scalar_privatization = 1;
  dependences = candl_dependence (program, candl_opt);
  candl_options_free (candl_opt);

  for (i = 0; i < nb_stmts; ++i)
    {
      for (dep = dependences; dep; dep = dep->next)
	if (dep->source == dep->target && dep->source->label == i)
	  if (dep->depth == dep->source->depth)
	    break;
      if (dep == NULL)
	continue;
      for (j = i + 1; j < nb_stmts; ++j)
	{
	  for (dep = dependences; dep; dep = dep->next)
	    if (dep->source == dep->target && dep->source->label == i)
	      if (dep->depth >= dep->source->depth - 1)
		break;
	  if (! dep)
	    {
/* 	      printf("PRUNED (2): t %d, %d\n", i, j); */
	      val = &(screen[ls_oset_idx_t (nb_stmts, i + 1, j + 1) - 1]);
	      val2 = &(screen[ls_oset_idx_p (nb_stmts, i + 1, j + 1) - 1]);
	      if (*val == LS_OSET_UNSET)
		*val = 0;
	      else if (*val == LS_OSET_ELIM && *val2 == LS_OSET_UNSET)
		*val2 = 1;
	    }
	}
    }
  candl_dependence_free (dependences);

  // Build OSet from the computed screen.
  s = ls_oset_build_system (program->nb_statements, screen);

  return s;
}

s_fm_system_t*
ls_fsspace_build_fset (s_graph_t* deps,
		       CandlProgram* program,
		       s_ls_options_t* options,
		       int* screen,
		       s_graph_t* fgraph)
{
  int i, j;
  int b;
  int e;
  int length;
  s_fm_system_t* s;
  s_vertex_t* v;
  s_fm_vector_t* cst;
  s_vertex_t** path = XMALLOC(s_vertex_t*, fgraph->vertex_count);
  s_vertex_t** negclos;
  s_vertex_t** posclos;

  int sz = 0;
  int nb_vars = program->nb_statements * (program->nb_statements - 1);
  /////////////////////////////
  //for (i = 0; i < nb_vars; ++i)
  //  screen[i] = LS_OSET_UNSET;
  /////////////////////////////

  if (options->prune_oset)
    s = ls_fsspace_build_prune_oset (deps, program, options, screen, fgraph);
  else
    s = ls_oset_build_system (program->nb_statements, screen);

  /////////
  //return s;
  /////////

  for (i = 0; i < nb_vars; ++i)
    if (screen[i] != -1)
      ++sz;

  int nbst = program->nb_statements;
/*   for (i = 0; i < nbst - 1; ++i) */
/*     for (j = i + 1; j < nbst; ++j) */
/*       printf("(%d,%d): %d %d\n", i + 1, j + 1, */
/* 	     screen[ls_oset_idx_p (nbst, i + 1, j + 1) - 1], */
/* 	     screen[ls_oset_idx_t (nbst, i + 1, j + 1) - 1]); */

/*   dotty_print_fuse (screen, program); */

  if (options->verbose)
    fprintf (options->out_file,
	     "[LetSee] Oset: %d fixed (out of %d)\n", sz, nb_vars);

  fgraph->usr = path;
  for (v = fgraph->root; v; v = v->next)
    {
/*       printf ("Going for node: %d\n", v->id); */
      if (v->walk == 0)
	continue;
      // Compute the current component.
      for (i = 0; i < fgraph->vertex_count; ++i)
	path[i] = NULL;
      ls_graph_dfs_from (fgraph, v, build_comp, NULL, NULL);
      for (length = 0; length < fgraph->vertex_count && path[length] != NULL;
	   ++length)
	;

/*       printf ("Traversing the component\n"); */
      // Traverse the component.
      b = 0; e = 2;
      do
	{
	  if (! check_fusible (deps, fgraph, path, b, e))
	    {
/* 	      printf ("2: NOT FUSIBLE\n"); */
	      // Prune the graph.
	      negclos = compute_negclos (deps, path[b]);
	      posclos = compute_posclos (deps, path[e]);
	      for (i = 0; negclos[i] != NULL; ++i)
		for (j = 0; posclos[j] != NULL; ++j)
		  {
		    screen[ls_oset_idx_p (program->nb_statements,
					  negclos[i]->id + 1,
					  posclos[j]->id + 1) - 1] = 1;
		    screen[ls_oset_idx_t (program->nb_statements,
					  negclos[i]->id + 1,
					  posclos[j]->id + 1) - 1] = 0;
		  }

	      // Build the constraint.
	      cst = fm_vector_alloc (s->nb_cols);
	      fm_vector_set_ineq (cst);
	      for (i = 0; i < e - b; ++i)
		{
		  fm_vector_assign_int_idx (cst, -1, ls_oset_idx_t
					    (program->nb_statements,
					     path[i]->id + 1,
					     path[i + 1]->id + 1));
		}
	      fm_vector_assign_int_idx (cst, e - b, cst->size - 1);
	      if (ls_oset_needed_cst (cst, screen))
		fm_system_add_line (s, cst);
	      // Remove path(b).
	      ls_graph_remove_vertex (fgraph, path[b]);
	      // Go to the next subpath to check for.
	      ++b;
	      if (e - b < 2)
		e = b + 2;
	    }
	  else
	    ++e;
	}
      while (e < length);
    }

  return s;
}


/**
 *
 */
void
ls_fsspace_prune_structures(s_graph_t* deps,
			    CandlProgram* program,
			    CandlDependence* dependences,
			    s_ls_options_t* options,
			    int* screen)
{


}



// Compute the solution polytope.
s_ls_space_t*
ls_fsspace_compute_solution (s_graph_t* deps,
			     CandlProgram* program,
			     CandlDependence* dependences,
			     s_ls_options_t* options)
{
  int i, count;
  int nb_vars = program->nb_statements * (program->nb_statements - 1);
  int* screen = XMALLOC(int, nb_vars);
  for (i = 0; i < nb_vars; ++i)
    screen[i] = LS_OSET_UNSET;

  // Build the fusion graph.
  if (options->verbose)
    fprintf (options->out_file, "[LetSee] Compute fusion graph\n");
  s_graph_t* F = ls_fsspace_build_fusiongraph (deps, program, dependences,
					       options, screen);

  // Add pruning constraints.
  ls_fsspace_prune_structures (deps, program, dependences, options, screen);

  // Build \FSet.
  if (options->verbose)
    fprintf (options->out_file, "[LetSee] Explore fusion graph\n");
  s_fm_system_t* s = ls_fsspace_build_fset (deps, program, options, screen, F);
/*   s_fm_system_t* s = ls_fsspace_build_fset_v2 (deps, program, options, screen, F); */

/*   fm_system_print (stdout, s); */

/*   for (i = 0; i < nb_vars; ++i) */
/*     printf ("%d ", screen[i]); */
/*   printf ("\n"); */
  s_ls_space_t* space = NULL;

  if (s->nb_cols > 2000)
    {
      fprintf (options->out_file, "[LetSee] Warning: system has too many variables for integrity check\n");
      space = ls_space_alloc ();
      space->u_compacted = NULL;
      space->dimension = 1;
      space->u_polyhedron = XMALLOC(s_fm_solution_t*, 2);
      space->u_polyhedron[0] = fm_system_to_solution (s);
      space->u_polyhedron[1] = NULL;
      space->polyhedron = space->u_polyhedron[0];
      fm_system_free (s);
    }
  else
    {
      if (options->verbose)
	{
	  /*       fm_system_print (stdout, s); */
	  if (! fm_piptools_check (s, FM_PIPTOOLS_INT))
	    assert(!"EMPTY solution set!!");
	  s_fm_compsol_t* cs = fm_compsol_init_sys (s);
	  /*       if (cs->poly != NULL) */
	  /* 	fm_solution_count (cs->poly, cs->poly->size, FM_SOLVER_VERBOSE); */
	  s_fm_system_t* st = fm_solution_to_system (cs->poly);
	  fprintf (options->out_file,
		   "[LetSee] FSet: %d variables (%d total), %d constraints\n",
		   cs->poly ? cs->poly->size : 0,
		   s->nb_cols - 2, st ? st->nb_lines : 0);
	  fm_system_free (st);
	  fm_compsol_free (cs);
	}
      space = ls_space_alloc ();
      space->u_compacted = XMALLOC(s_fm_compsol_t*, 2);
      space->u_compacted[0] = fm_compsol_init_sys (s);
      if (options->normalize_space)
	{
	  s_fm_system_t* psyst =
	    fm_solution_to_system (space->u_compacted[0]->poly);
	  fm_solution_free (space->u_compacted[0]->poly);
	  fprintf (options->out_file, 
		   "[LetSee] Normalize space with FM Projection...\n");
	  space->u_compacted[0]->poly =
	    fm_solver (psyst, FM_SOLVER_AUTO_SIMPLIFY);
	  fm_system_free (psyst);
	}
      space->u_compacted[1] = NULL;
      space->dimension = 1;
      space->u_polyhedron = XMALLOC(s_fm_solution_t*, 2);
      space->u_polyhedron[0] = fm_compsol_expand (space->u_compacted[0]);
      space->u_polyhedron[1] = NULL;
      fm_system_free (s);
    }

  return space;
}



/**
 * \brief Build the set of legal fusion structure.
 *
 *
 */
s_ls_space_t*
ls_fsspace_build (CandlProgram* program,
		  CandlDependence* dependences,
		  s_ls_options_t* options)
{
  int i, j, k;
  s_ls_space_t* space = NULL;
  s_graph_t* graph;
  CandlDependence* tmp;

  if (options->verbose)
    fprintf(options->out_file, "[LetSee] Build legal fusion structures\n");

  // Preprocess the dependence graph.
  CandlDependence* depgraph = // dependences;
    ls_schedspace_depgraph_preprocess (program, dependences);
  candl_dependence_pprint (stdout, depgraph);

  // Build the dependence graph from Candl's representation.
  graph = ls_graph_alloc ();
  s_vertex_t** vertex = XMALLOC(s_vertex_t*, program->nb_statements);
  int uid = 0;
  for (i = 0; i < program->nb_statements; ++i)
    vertex[i] = ls_graph_create_vertex (graph, program->statement[i], uid++);

  for (tmp = depgraph; tmp; tmp = tmp->next)
    ls_graph_create_edge (graph, vertex[tmp->source->label],
			  vertex[tmp->target->label], tmp);

  // Compute the solution polytope.
  space = ls_fsspace_compute_solution (graph, program, depgraph, options);

  if (options->verbose)
    fprintf(options->out_file, "[LetSee] All systems solved\n");

  // Initial PIP check.
  if (space->u_polyhedron[0]->size < 2000)
    for (i = 0; i < space->size; ++i)
      if (! fm_piptools_check_sol (space->u_polyhedron[i], FM_PIPTOOLS_INT))
	{
	  if (options->verbose)
	    fprintf(options->out_file,
		    "[LetSee] Ending computation: no point at dimension %d\n",
		    i + 1);
	  exit (1);
	}

  // Internal conflict/dependence graph is no longer needed.
  ls_graph_free (graph);

  // Copy space for compatibility with former heuritics.
  space->polyhedron = space->u_polyhedron[0];

  return space;
}
