#!/usr/bin/env python

# Jacob Joseph
# 11 Nov 2008

# A quick scatterplot of bitscore vs e-value

from matplotlib import pyplot
from DurandDB import blastq
import math

def fetch_scores( bq, br_id):
    q = """SELECT bit_score, -log10(e_value / '%d' + 1E-200)
    FROM blast_hit_symmetric
    WHERE br_id=%d"""

    dbsize = bq.fetch_db_size( br_id)

    dbret = bq.dbw.fetchall( q % (dbsize, br_id))
    return dbret

def unique_scores( slist):
    cnt_dict = {}
    for tup in slist:
        if tup in cnt_dict:
            cnt_dict[tup] += 1
        else:
            cnt_dict[tup] = 1

    bit_list = []
    e_list = []
    cnt_list = []
    for ((bit,e),cnt) in cnt_dict.iteritems():
        bit_list.append( bit)
        e_list.append( e)
        cnt_list.append( cnt)

    return (bit_list, e_list, cnt_list)

if __name__ == "__main__":
    bq = blastq.blastq()

    scores = fetch_scores( bq, 91)

    (bit, e, cnt) = unique_scores( scores)

    logcnt = [ math.log10(c) for c in cnt]

    pyplot.scatter( bit, e, c=logcnt, marker='o')
    pyplot.axis('tight')
    pyplot.grid(color='gray', alpha=0.5)
    pyplot.colorbar()
    pyplot.xlabel('Bit-score')
    pyplot.ylabel('-log10(e_value/dbsize + 1E-200)')
    pyplot.subplots_adjust(left=0.1, bottom=0.05, right=0.98, top=0.95)
    pyplot.title("E-value vs Bit score (br_id: 91)")
