// Copyright (C) 2014 Martin Muggli  All Right Reserved.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program.  If not, see http://www.gnu.org/licenses/ .


int maxdepth;
float minthreshold;
float f_thresh;
int search_radius;
int largest_maybe_frag;
bool verbose;
int aligned_frags;

int smallest_frag_length;
//int *optmap;

static const int OM_STDDEV = 150;
#include <sdsl/suffix_arrays.hpp>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <algorithm>
std::vector<unsigned int> optfrag2locus;
#include <program_options.hpp>


std::vector<int> sa;
#include "mysuffix.hpp"

//#include "loader.h"



namespace po = boost::program_options;
using namespace std;
using namespace sdsl;

class Contig {
public:
    std::string id;
    int length;
    vector<int> sites;

};

bool contigs_lt(const Contig& a, 
                       const Contig& b) 
{

    return a.sites.size() > b.sites.size();
}

extern "C" {
    char* loadfile(int elems, const char* fname);
}

// FIXME: soma code doesn't use these types yet!

typedef int t_fmsym;

const unsigned FMSYMSIZE = sizeof(t_fmsym);
    

void load_contigs(vector<Contig>  &contigs,
//                  map<string, int> &contig_id2length,
                  std::string &silico_fname)
{
    ifstream silico_file(silico_fname.c_str());
    map<string, vector<int> > sequence2sites; // in-silico sequence map container

    string id; int length; int num_sites;
    while(silico_file >> id >> length >> num_sites) {
        Contig contig;
        contig.length = length;
        contig.id = id;
        //contig_id2length[id] = length;

        for (int i = 0; i < num_sites; i++) {
            
            int site; silico_file >> site;
            contig.sites.push_back(site);
            sequence2sites[id].push_back(site);
        }
        contigs.push_back(contig);
    }
    silico_file.close();

    sort(contigs.begin(), contigs.end(), contigs_lt); 
}


int parseargs(int argc, char *argv[], std::string &omap, std::string &silico) 
{
// Declare the supported options.
    po::options_description desc("Allowed options");
    desc.add_options()
        ("help", "produce help message")
        ("verbose", "show successful steps in approximate backtracking search")
        ("opt_map", po::value<string>(), "REQUIRED set optical map binary file")
        ("silico_map", po::value<string>(), "REQUIRED set in-silico digested contigs file")
        ("fval", po::value<float>(), "precision/recall tradeoff (default 4.0)")
        ("search_radius", po::value<int>(), "radius around silico fragment size that should be searched for optmap candidates (i.e. tollerance) (default 1000)")
        ("largest_maybe_frag", po::value<int>(), "size below which TWIN should consider discarding in-silico digested fragments (default 1000)")
        ("smallest_frag_length", po::value<int>(), "size below which in-silico digested fragments should be always discarded (default 250)")
        ;

    po::variables_map vm;
    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm);    

    if (vm.count("help")) {
        cout << desc << "\n";
        return 0;
    }

    verbose = false;
    if (vm.count("verbose")) {
        verbose = true;
    }

    if (vm.count("opt_map")) {
        omap = vm["opt_map"].as<std::string>() ;
    } else {
        cout << "No optical map specified" << std::endl;
        cout << desc << "\n";
        return 0;
    }

    if (vm.count("silico_map")) {
        silico = vm["silico_map"].as<std::string>() ;
    } else {
        cout << "No in-silico map specified" << std::endl;
        cout << desc << "\n";
        return 0;
    }

    if (vm.count("fval")) {
        f_thresh = vm["fval"].as<float>();
    } else {
        f_thresh = 4.0;
    }
    cout << "F value was set to " 
         << f_thresh << ".\n";

    if (vm.count("search_radius")) {
        search_radius = vm["search_radius"].as<int>();
    } else {
        search_radius = 1000;
    }

    if (vm.count("largest_maybe_frag")) {
        largest_maybe_frag = vm["largest_maybe_frag"].as<int>();
    } else {
        largest_maybe_frag = 1000;
    }

    if (vm.count("smallest_frag_length")) {
        smallest_frag_length = vm["smallest_frag_length"].as<int>();
    } else {
        smallest_frag_length = 250;
    }


    cout << "search radius value was set to " 
         << search_radius << ".\n";

    return 1;

}



struct mysortclass {
    mysortclass(int* optmap_, int optmap_size_){
        optmap = optmap_;
        optmap_size = optmap_size_;
    }
    int* optmap;
    int optmap_size;
    bool operator() (int i,int j) { 
        // handle the imaginary terminating "$"
        if (i == optmap_size) return false;
        if (j == optmap_size) return true;

        // if they're equal, go the next symbol
        if (optmap[i] == optmap[j]) { 
            return (*this)(i+1, j+1);
        } else {
            // if not, then this symbol determines it
            return optmap[i ] < optmap[j ];
        }
    }
};



int main(int argc, char *argv[])  {

    std::string opt_map;
    std::string silico_map;
    if (!parseargs(argc, argv, opt_map, silico_map)) {
        return 1;
    }

    maxdepth = 0;
    csa_wt<wt_int<>, 
           64, 
           64, 
           sa_order_sa_sampling<>, 
           int_vector<>, 
           int_alphabet<>
    > fm_index;

     sdsl::construct(fm_index, opt_map.c_str(), 4);//FMSYMSIZE);
    std::cout << "FM-Index indexed " << fm_index.size() - 1 << " elements." 
              << std::endl;


    std::cout << "Constructing suffix array..." << std::endl;

    

    int fd;
    int *optmap = (int*) loadfile(fm_index.size(), opt_map.c_str()); //FIXME: don't forget to un mmap this

    for (int i = 0; i < fm_index.size() + 1; ++i) {
        sa.push_back(i);
    }
    mysortclass mysortobject(optmap, fm_index.size());

    std::sort(sa.begin(), sa.end(), mysortobject);


    std::cout << "Done constructing suffix array." << std::endl;
    

    // build a lookup table to map from fragment index to locus in the genome

    int locus = 0;
    //FIXME: this could be slow to lookup in the text this way; there may be a better fmindexy way 
    //       traverse the text
    std::cout << "constructing iBWT LUT..." << std::endl;
    for(int jk = 0; jk < fm_index.size(); ++jk) {
        locus +=  fm_index.text[jk];
        optfrag2locus.push_back(locus);
        //std::cout << locus << std::endl;
    }
    // another in-silico sequence map container
    std::cout << "Done constructing iBWT LUT." << std::endl;
    vector<Contig> contigs; 

    std::cout << "Loading contigs..." << std::endl;
    load_contigs(contigs,  silico_map);
    std::cout << "Done loading contigs." << std::endl;


    
    //////////////// iterate through the insilico maps ////////////////////////

    std::cout << "Processing " << contigs.size() 
              << " in-silico digested contigs..." << std::endl;
    int placedcount = 0;
    int accum = 0;
    int procd_count = 0;
    int attempts = 0;
    int skipped = 0;
    vector<Contig>::iterator contig_it;
    vector<Contig>::iterator contig_it_end = contigs.end();
    for (    contig_it = contigs.begin(); 
             contig_it != contig_it_end; 
             contig_it++) {
        maxdepth = 0;
        minthreshold = 10000;
        if (procd_count % 1000 == 0) {
            std::cout << "processing contig " 
                      << procd_count << std::endl;
        }

        procd_count++;

        sort(contig_it->sites.begin(), contig_it->sites.end()); 

        // convert offsets of restriction sites to in-silico fragment sizes, 
        // only when larger than threshold into sequence_pieces
        vector<int> sequence_pieces; 
        int left_tail = contig_it->sites[0];
        //sequence_pieces.push_back(contig_it->sites[0]); // put in the first piece


        for (    vector<int>::iterator it2 = contig_it->sites.begin()+1; 
                 it2 != contig_it->sites.end(); 
                 it2++) {
            if (*it2-*(it2-1) >=  smallest_frag_length) {
                sequence_pieces.push_back(*it2-*(it2-1));
            }
        }

        //sequence_pieces.push_back(contig_id2length[contig_it->first] 
        //    - contig_it->sites[contig_it->sites.size()-1]); // put the last piece in
        
        int right_tail = contig_it->length 
            - contig_it->sites[contig_it->sites.size()-1];

        if (sequence_pieces.size() == 0) {
            skipped++;
            continue;
        }

        // now convert this to an array, 
        // because sdsl seems to work okay with c-style arrays

        int* seqs = (int*) malloc(sizeof(int)*(sequence_pieces.size() + 1));
        int* seqs_rev = (int*) malloc(sizeof(int)*(sequence_pieces.size() + 1));
        int offset2 = 0;
        std::cout << "Matching contig " << contig_it->id << ":";
        std::cout << "(ignored " << left_tail << ") ";
        for (    vector<int>::iterator it2 = sequence_pieces.begin(); 
                 it2 != sequence_pieces.end();  
                 it2++) {        
            seqs[offset2] = *it2;
            seqs[sequence_pieces.size() - 1 - offset2] = *it2;
            offset2++;
            std::cout << *it2 << " ";
        }
        std::cout << "(ignored " << right_tail << ") ";
        std::cout << std::endl;
        attempts++;
        // forward alignments
//        int fwalignments = sdsl::count(fm_index, seqs, seqs + offset2);
        int fwalignments = mycount1(fm_index, 
                                   sequence_pieces.begin(), 
                                   sequence_pieces.end());
        if (fwalignments > 0) {
            accum += fwalignments;
            //std::cout << "(alignments found " << alignments << ")";
        }
        std::cout << "backward alignment:" << std::endl;
        // backward alignments

        int bwalignments = mycount1(fm_index, 
                                   sequence_pieces.rbegin(), 
                                    sequence_pieces.rend());
        if (bwalignments > 0) {
            accum += bwalignments;
            //std::cout << "(alignments found " << alignments << ")";
        }
        std::cout << " threshold: " << minthreshold <<std::endl;
        if (fwalignments > 0 || bwalignments > 0) {
            placedcount++;
            //std::cout << "found_alignments_for " << contig_it->first << std::endl;
        } else {
            std::cout << "no alignments found. maxdepth = " << maxdepth ;
            // for (int i : sequence_pieces)
            //     std::cout << i << "\t";
            std::cout << std::endl;
        }
        free(seqs);
        free(seqs_rev);

        //std::cout << std::endl;
    }
    std::cout << "placed somewhere: " << placedcount 
              << " total alignments: " << accum << std::endl;
    std::cout << "attempts: " << attempts << " skipped:" << skipped << std::endl;

        csa_wt<wt_int<>, 
           64, 
           64, 
           sa_order_sa_sampling<>, 
           int_vector<>, 
           int_alphabet<>
               >::iterator fmi = fm_index.begin(), fme=fm_index.end();
        int ijk;
        for(;fmi != fme; ++fmi) {
            ijk++;
        }

//    std::cout << "maxdepth " << maxdepth << std::endl;
    //  sdsl::write_structure<HTML_FORMAT>(fm_index,"fm_index-file.sdsl.html");
}










