#!/usr/bin/python

import sys
import re
import struct
import Bio.SearchIO

if len(sys.argv) <= 3:
    print "Usage:", sys.argv[0], "twin_output binary_optical_map output.psl"

item = struct.Struct("i")

def make_hsp(contig_id, contig_seq, opt_startfrag, opt_genome_locus, optmap,size, strand):
    hspfrag = Bio.SearchIO.HSPFragment(query_id=contig_id)
    hspfrag.query_start = 0
    hspfrag.query_end = size
    hspfrag.hit_start = opt_genome_locus
    hspfrag.hit_end = opt_genome_locus + size
#    hspfrag.hit_strand = 1
    hspfrag.query_strand = strand
    hsp = Bio.SearchIO.HSP(fragments=[hspfrag])
    hsp.match_num = size #FIXME
    hsp.mismatch_num = 0
    hsp.match_rep_num = 0
    hsp.n_num = size
    hsp.query_gapopen_num = 0
    hsp.query_gap_num = 0
    hsp.hit_gapopen_num = 0
    hsp.hit_gap_num = 0
    
    hsp.ident_num = 0
    hsp.gapopen_num = 0
    hsp.gap_num = 0
    # hsp.query_start = 0
    # hsp.query_end = 0

    # hsp.query_span = 0
    # hsp.query_range = 0
    #print contig_id, contig_seq, opt_startfrag, ":", optmap[opt_startfrag][0], opt_genome_locus
    return hsp

def load_optmap(fname):
    """Return a list of the frags for a binary opt map file fname"""
    f = open(fname)
    frags = []
    word = f.read(4)
    while len(word) == 4:
        frag = item.unpack(word)
        frags.append(frag[0])
        word = f.read(4)
    print "alphabet size is ", len(set(frags))
    return frags

optmap_fname = sys.argv[2]
optmap = load_optmap(optmap_fname)
optmap_size = sum(optmap)

PSLfile = open(sys.argv[3], "w")
hsps = []

contig_line = re.compile(r"""^Matching\ contig\ #
                             ([^:]+)               # contig name
                             :\(ignored\ 
                             (\d+)                 # left end
                             \)
                             (.*)                  # interior frags
                             \(ignored\  
                             (\d+)                 # right end
                             \)""", re.X)


strand = 1 # keeps track of whether twin is in the forward alignment detection state or backward

for line in open(sys.argv[1]):

    
    if "backward alignment:" in line: strand = -1

    # try to parse line as a contig line
    contig_mobj = contig_line.search(line.strip())
    if contig_mobj:
        strand = 1
        if len(hsps):
            hit = Bio.SearchIO.Hit(hsps=hsps)
            hit.id = optmap_fname
            hit.seq_len = optmap_size
            qresult = Bio.SearchIO.QueryResult(hits=[hit])
            qresult.seq_len = sum(cur_contig_seq) #FIXME
            Bio.SearchIO.write(qresult, PSLfile, header=False, format='blat-psl')
            hsps = []
        # this will mark a new query
        cur_contig_id = contig_mobj.group(1)
        left_end = [int(contig_mobj.group(2))]
        interior_frags = [int(n) for n in contig_mobj.group(3).strip().split(" ")]
        right_end = [int(contig_mobj.group(4))]
        
        cur_contig_seq = left_end + interior_frags + right_end

    # try to parse line as an optical map segment alignment
    
    opmap_mobj = re.search(r"^Aligned at optical frag \(sa (\d+)\)\. Locus (\d+)\.", line.strip())
    if opmap_mobj: 
        hsps.append(make_hsp(cur_contig_id, cur_contig_seq, int(opmap_mobj.group(1)), opt_genome_locus=int(opmap_mobj.group(2)) - cur_contig_seq[0], optmap=optmap, size=sum(cur_contig_seq), strand=strand))



hits = []
hits.append(Bio.SearchIO.Hit(hsps=hsps))




