#!/usr/bin/env python

# Jacob Joseph
# 11 Dec 2009

# Build an HTML page to navigate hierarchical clusters

import sys, cProfile, time
from JJcluster.cluster_obj import cobj
from JJcluster.describe import describe

class browser(describe):

    def __init__(self, cluster_run_id, 
                 cacheq=False,
                 family_set_name=None):
        describe.__init__(self, cluster_run_id = cluster_run_id,
                          clustering_type = 'hierarchical',
                          cacheq = cacheq,
                          family_set_name = family_set_name)

    def html_hierarchy(self, orgarg="", family_abbrev=None):

        # constrain left and right to the extents of the family in the
        # tree.
        if family_abbrev is not None:
            family_members = self.fq.fetch_family_seqs( family_abbrev)
            parent_id = self.CR.get_common_parent( seq_ids=family_members)
            parent_row = self.CR.get_cluster_row( parent_id)
            #parent_row = self.CR.get_cluster_row( parent_row['parent_id'])
            #parent_row = self.CR.get_cluster_row( parent_row['parent_id'])
            #parent_row = self.CR.get_cluster_row( parent_row['parent_id'])
            #parent_row = self.CR.get_cluster_row( parent_row['parent_id'])

            print "Common parent:", parent_id, parent_row
            print "Nodes: %d" % (parent_row['rgt']-parent_row['lft'],) 
            
            root = self.CR.fetch_structure( left_lim=parent_row['lft'],
                                            right_lim=parent_row['rgt'])
        else:
            root = self.CR.fetch_structure()

        s = """<html><body style="white-space: nowrap;">\n"""

        s += self.html_run_header(orgarg=orgarg)

        s_prefix_sequence = """%(level)0.3d
<a href="#%(parent_id)s">P</a>
<SPACER TYPE=BLOCK WIDTH=%(space)d>"""

        s_cluster = """%(level)0.3d
<a href="#%(parent_id)s">P</a>
<a href="#%(left_id)d">L</a> <a href="#%(right_id)d">R</a>
<SPACER TYPE=BLOCK WIDTH=%(space)d>
<a name="%(cluster_id)d" href="http://quantbio-tools.princeton.edu/cgi-bin/CE?url=http://diatom.compbio.cs.cmu.edu:8001/%(cr_id)d/%(cluster_id)d%(orgarg)s">
<b>Cluster %(cluster_id)d</a>:
Cluster Similarity: %(clustsim)0.4f,
Size: %(num_nodes)d</b>,
Density: %(density)0.4f,
J: %(J)0.4f,
Edges: %(num_edges)d,
Frac. Edges: %(frac_edges)0.4f,
Mean: %(mean)0.4f(%(stdev)0.4f)<br>\n"""

        s_cluster_large = """%(level)0.3d
<a href="#%(parent_id)s">P</a> <a href="#%(left_id)d">L</a> <a href="#%(right_id)d">R</a>
<SPACER TYPE=BLOCK WIDTH=%(space)d>
<a name="%(cluster_id)d" href="http://quantbio-tools.princeton.edu/cgi-bin/CE?url=http://diatom.compbio.cs.cmu.edu:8001/%(cr_id)d/%(cluster_id)d%(orgarg)s"><b>Cluster %(cluster_id)d</a>: Cluster Similarity: %(clustsim)0.4f, Size: %(num_nodes)d</b>, J: %(J)0.4f<br>\n"""

        # work around emacs lingering highlighting of quote in triple quotes "

        # (family_set, family_member) = (self.fq.family_sets,
        #                                self.fq.family_members)
        
        # stack of (level, cluster) tuples, where level is the amount
        # of indentation needed
        queue = []
        queue.append( (0, root, None, None))
        while len(queue) > 0:
            (level, clust, parent_size, parent_id) = queue.pop()

            #print clust.cluster_id()

            # We're at a leaf cluster
            if 1 == clust.right() - clust.left():
                seq_id = clust.items()[0]
                s += s_prefix_sequence % {'level': level,
                                          'space': level*4 + 35,
                                          'parent_id': parent_id}
                s += self.html_sequence( seq_id)

            else:
                cluster_id = clust.cluster_id()
                #hit_dict, seq_set = self.fetch_cluster_hits(cluster_id)
                #edge_stats = self.cluster_stats(cluster_id, hit_dict=hit_dict)

                cluster_size = len( self.CR.fetch_cluster( cluster_id))

                # Add children to the queue
                children = clust.items()

                # order so that smaller clusters (esp singletons) come first (i.e. last on the queue)
                children.sort(key=lambda a: a.right()-a.left(), reverse=True)
                
                for child in children:
                    if isinstance(child, cobj):
                        queue.append( (level + 1, child, cluster_size, cluster_id))

                if parent_size is None: parent_size = cluster_size

                # edge statistics take a while to calculate, so
                # calculate them only for smaller clusters
                if cluster_size > 1000:
                    s += s_cluster_large % {'level': level,
                                            'space': level*4,
                                            'cluster_id': cluster_id,
                                            'clustsim': 1-clust.distance(),
                                            'num_nodes': cluster_size,
                                            'J': float(parent_size - cluster_size) / cluster_size,
                                            'left_id': clust.items()[0].cluster_id(),
                                            'right_id': clust.items()[1].cluster_id(),
                                            'parent_id': parent_id,
                                            'cr_id': self.CR.cr_id,
                                            'orgarg': orgarg
                                            }

                else:
                    edge_stats = self.cluster_stats(cluster_id)

                    s += s_cluster % {'level': level,
                                      'space': level*4,
                                      'cluster_id': cluster_id,
                                      'clustsim': 1-clust.distance(),
                                      'num_nodes': cluster_size,
                                      'num_edges': edge_stats['num_edges'] / 2,
                                      'frac_edges': edge_stats['frac_edges'],
                                      'density': edge_stats['density'],
                                      'mean': edge_stats['mean'],
                                      'stdev': edge_stats['stdev'],
                                      'J': float(parent_size - cluster_size) / cluster_size,
                                      'left_id': clust.items()[0].cluster_id(),
                                      'right_id': clust.items()[1].cluster_id(),
                                      'parent_id': parent_id,
                                      'cr_id': self.CR.cr_id,
                                      'orgarg': orgarg
                                      }
                                   
            
        s += "</body></html>\n"
        return s



class runparam:
    def __init__(self, br_id=None, nc_id=None,
                 stype=None, set_id=None):
        self.br_id = br_id
        self.nc_id = nc_id
        self.stype = stype
        self.set_id = set_id
    

if __name__ == "__main__":
    cr_id = int(sys.argv[1])
    set_id = int(sys.argv[2])
    family_set_name = sys.argv[3]
    family_abbrev = sys.argv[4] if len(sys.argv) == 5 else None  # used to select a subtree

    date = time.strftime('%Y%m%d')
    
    # FIXME: workaround for not storing br_id, nc_id, set_id in a
    # queryable field
    # cr_id_map = {
    #         70: runparam(100, 746, 'nc_score', 105),  # full set
    #         71: runparam(97, 750, 'nc_score', 105),   # cluster
    #         72: runparam(97, 746, 'nc_score', 107),   # full set blast, new jan10 set
    #         73: runparam(104, 777, 'nc_score', None), # 12 species, not symmetric
    #         74: runparam(104, 777, 'nc_score', 109),  # human and mouse only, not symmetric
    #         75: runparam(104, 779, 'nc_score', ),     # 12 species, symmetric
    #         76: runparam(105, 780, 'nc_score', ),     # 48 species, not symmetric
    #         77: runparam(104, 779, 'nc_score', 109),  # Human and mouse only.  Symmetric
    #         78: runparam(104, 779, 'nc_score', 109),  # Human and mouse only.  Symmetric.  Single linkage
    #         79: runparam(104, 779, 'nc_score', 111),  # Yeast only.  Symmetric
    #         80: runparam(104, 779, 'nc_score', 111),  # Yeast only.  Symmetric. Single linkage
    #         81: runparam(104, 779, 'nc_score', 112),  # Human only.  Symmetric.
    #         82: runparam(105, 781, 'nc_score', 112),  # Human only.  Symmetric.  Not compositional
    #         }
    
    b = browser(cluster_run_id = cr_id,
                family_set_name=family_set_name,
                cacheq=True)

    s = b.html_hierarchy(orgarg="&o=h",
                         family_abbrev=family_abbrev)
    
    fd = open("figures/%s_browser_cr_id_%d_set_id_%d_%s_%s.html" % (
        date, cr_id, set_id, family_set_name, family_abbrev),
              'w')
    fd.write(s)
    fd.close()
