print "Loading molecular biology results,",
#Load the molecular biology results (second column)
#for each patient into a Python dictionary, using the
#patient ID as the key (first column)
input = open("ALL_mol_biol.tsv")
lines = input.readlines()
input.close()
mol_biol = dict()
for line in lines :
    #Strip any trailing newline
    if line[-1] == "\n" : line = line[:-1]
    #Break up the line using the tabs
    parts = line.split("\t")
    #Check there are exactly two fields...
    assert len(parts)==2
    #Store the patients mol biol results:
    mol_biol[parts[0]] = parts[1]
print "Done"

print "Loading expression data,",
input = open("ALL_exprs.tsv")
lines = input.readlines()
input.close()
#The genes are the rows, patients are columns.
#Deal with the header row...
line = lines[0]
#Strip any trailing newline
if line[-1] == "\n" : line = line[:-1]
#Break up the line using the tabs
parts = line.split("\t")
#Check first entry is blank
assert parts[0]==""
#Store the patient (column) names
col_names = parts[1:]
row_names = []
exprs = []

for line in lines[1:] :
    #Strip any trailing newline
    if line[-1] == "\n" : line = line[:-1]
    #Break up the line using the tabs
    parts = line.split("\t")
    #Check there are exactly n+1 fields...
    assert len(parts)==len(col_names)+1
    #Store the expression levels for this gene
    row_names.append(parts[0])
    exprs.append(map(float, parts[1:]))
    
assert len(exprs)==len(row_names)
print "Done"
print "%i patients, %i genes" % (len(col_names), len(row_names))

print "Converting expression data into Numeric array,",
import Numeric
exprs_as_array = Numeric.array(exprs, Numeric.Float)
print "Done"

def patient_colour(patient_id) :
    assert patient_id in mol_biol, \
           "Patient ID of '%s' not in list!" % patient_id
    if mol_biol[patient_id] == "ALL1/AF4" :
        return "#FF0000" # Red
    else :
        return "#0000FF" # Blue
patient_colours = map(patient_colour, col_names)

from rpy import *
print "Heatmap as PNG,",
r.png("heatmap_from_python.png", width=600, height=589)
r.heatmap(exprs_as_array,
          cexRow=0.5,
          labRow=row_names, labCol=col_names,
          ColSideColors = patient_colours,
          col = r.topo_colors(50))
r.dev_off()
print "Done"

print "Heatmap as PDF,",
r.pdf("heatmap_from_python.pdf")
r.heatmap(exprs_as_array,
          cexRow=0.3,
          labRow=row_names, labCol=col_names,
          ColSideColors = patient_colours,
          col = r.topo_colors(50))
r.dev_off()
print "Done"

#Note
#In R, the exprs object would include the row names
#and column names.  This isn't possible with an array
#in Python, so we must explicitly provide them.

