Commit 11b750b2 by Mustafa Tekpinar

Plotting part was moved from R to Python.

parent a12e8938
...@@ -123,13 +123,13 @@ if(simple){ ...@@ -123,13 +123,13 @@ if(simple){
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt")) write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"))
print("done") print("done")
if(simple){ # Since I am doing the plots with Python, I am commenting this part
print("generating the plots...") # In addition, I want to get rid of R dependency completely!
plotMatBlues(paste(prot,"_normPred_evolEpi",sep="")) # if(simple){
plotMatGreens(paste(prot,"_normPred_evolInd",sep="")) # plotMatBlues(paste(prot,"_normPred_evolEpi",sep=""))
plotMatOranges(paste(prot,"_normPred_evolCombi",sep="")) # plotMatGreens(paste(prot,"_normPred_evolInd",sep=""))
print("done") # plotMatOranges(paste(prot,"_normPred_evolCombi",sep=""))
} # }
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (c) 2018: Elodie Laine # Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license. # This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package. # Please see the LICENSE.txt file included as part of this package.
...@@ -10,8 +11,148 @@ import argparse ...@@ -10,8 +11,148 @@ import argparse
import re import re
import subprocess import subprocess
import math import math
import numpy as np
import matplotlib.pylab as plt
from gemmeAnal import * from gemmeAnal import *
alphabeticalAminoAcidsList = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
def parseGEMMEoutput(inputFile, verbose):
"""
Parse normalized (I don't know how?!) GEMME output files:
Independent, Epistatic and Combined
Return: Data File as a Numpy array, where each col contains
deleteriousness score for an amino acid.
verbose is a boolean value
"""
gemmeDataFile =open(inputFile, 'r')
allLines = gemmeDataFile.readlines()
gemmeDataFile.close()
headerLine = allLines[0]
if(verbose):
print(headerLine)
matrixData = []
aaColumn = []
for line in allLines[1:]:
tempList = []
data = line.split()
for item in data:
if item[0] == "\"":
aaColumn.append(item)
elif (item == 'NA'):
tempList.append(0.0000000)
else:
tempList.append(float(item))
matrixData.append(tempList)
mutationsData = np.array(matrixData)
# mutatedTransposed = mutationsData.T
# return mutatedTransposed
return mutationsData
def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
colorMap = 'coolwarm', offSet=0, pixelType='square'):
"""
A function to plot deep mutational scanning matrices.
Parameters
----------
scanningMatrix: numpy array of arrays
Data matrix to plot
outFile: string
Name of the output png image
colorMap: matplotlib cmap
Any colormap existing in matplotlib.
Default is coolwarm.
offSet: int
It is used to match the residue IDs in your PDB
file with 0 based indices read from scanningMatrix matrix
pixelType: string
It can only have 'square' or 'rectangle' values.
It is a matter of taste but I added it as an option.
Default is 'square'
Returns
-------
Nothing
"""
#We subtract 1 from beg bc matrix indices starts from 0
if(end == None):
end = len(scanningMatrix[0])
# print("Beginning: "+str(beg))
# print("End : "+str(end))
# print(len(scanningMatrix[0]))
subMatrix = scanningMatrix[:, (beg-1):end]
#print(subMatrix)
##########################################################################
# Set plotting parameters
nres_shown = len(subMatrix[0])
fig_height=8
# figure proportions
fig_width = fig_height/2 # inches
fig_width *= nres_shown/20
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
if (nres_shown >=200):
majorTics = 50
else:
majorTics = 25
major_nums_x = np.arange(majorTics, len(subMatrix[0]), majorTics, dtype=int)
major_nums_x = major_nums_x -1
major_nums_x = np.insert(major_nums_x, 0, 0)
# print(major_nums_x)
minor_nums_x = np.arange(10, len(subMatrix[0]), 10, dtype=int)
minor_nums_x = minor_nums_x - 1
minor_nums_x = np.insert(minor_nums_x, 0, 0)
# print(minor_nums_x)
major_labels_x = major_nums_x + 1 + offSet
major_nums_y = np.arange(0, 20, 1, dtype=int)
major_labels_y = alphabeticalAminoAcidsList
plt.xticks(major_nums_x, major_labels_x, size=28)
ax.set_xticks(minor_nums_x, minor=True)
plt.yticks(major_nums_y, major_labels_y, size=16)
ax.set_yticklabels(major_labels_y, ha='left')
ax.tick_params(axis='y', which='major', pad=30)
#############################################################################
if(pixelType=='square'):
#For plotting square pixels
plt.imshow(subMatrix, cmap=colorMap)
elif(pixelType=='rectangle'):
#For plotting rectangular pixels
plt.imshow(subMatrix, cmap=colorMap, aspect=3.0)
else:
print("\nERROR: Unknown pixelType specified!\n")
sys.exit(-1)
#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
plt.tight_layout()
plt.savefig(outFile)
#plt.show()
#plt.imsave('output.png', subMatrix)
plt.close()
def check_argument_groups(parser, arg_dict, group, argument): def check_argument_groups(parser, arg_dict, group, argument):
""" """
...@@ -144,6 +285,31 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N): ...@@ -144,6 +285,31 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
launchJET(prot,retMet,bFile,fFile,n,N,nl) launchJET(prot,retMet,bFile,fFile,n,N,nl)
print("done") print("done")
launchPred(prot,inAli,mutFile) launchPred(prot,inAli,mutFile)
#Do Python plotting here
#TODO: Eventually, I will do the map plotting with a completely independent
# module and call the module here!
#TODO: Mark the original (wildtype) residue locations with a dot or something
# special to show the original amino acid.
#TODO: You can even put letters on the top line like in EVmutation output.
simple = True
if(simple):
print("generating the plots...")
gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi.png", 1, None,\
colorMap='Blues_r', offSet=0, pixelType='square')
gemmeData = parseGEMMEoutput(prot+"_normPred_evolInd.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd.png", 1, None,\
colorMap='Greens_r', offSet=0, pixelType='square')
gemmeData = parseGEMMEoutput(prot+"_normPred_evolCombi.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi.png", 1, None,\
colorMap='Oranges_r', offSet=0, pixelType='square')
print("done")
cleanTheMess(prot,bFile,fFile) cleanTheMess(prot,bFile,fFile)
......
...@@ -457,66 +457,68 @@ normalizePredSelMult<-function(pred, trace, wt, listMut){ ...@@ -457,66 +457,68 @@ normalizePredSelMult<-function(pred, trace, wt, listMut){
return(-normPred) return(-normPred)
} }
# Moved deep mutational map production from R to Python
library("RColorBrewer") # Therefore, I commented the following part till the end.
# by MT
############################################## # library("RColorBrewer")
### functions for ploting the results
############################################## # ##############################################
plotMatOranges<-function(pred_name){ # ### functions for ploting the results
# ##############################################
# plotMatOranges<-function(pred_name){
pred = as.matrix(read.table(paste(pred_name, ".txt", sep=""))) # pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
sel=seq(1,dim(pred)[[2]]) # sel=seq(1,dim(pred)[[2]])
Iwidth = round(sqrt(dim(pred)[[2]])) + 3 # Iwidth = round(sqrt(dim(pred)[[2]])) + 3
prop=99 # prop=99
pred[is.na(pred)]=0 # pred[is.na(pred)]=0
minVal = floor(min(pred)) # minVal = floor(min(pred))
maxVal = ceiling(max(pred)) # maxVal = ceiling(max(pred))
p = (100-prop)/100 # p = (100-prop)/100
cutVal = quantile(pred,c(0,p))[2] # cutVal = quantile(pred,c(0,p))[2]
jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300) # jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
print(c(-maxVal,-cutVal,-minVal)) # print(c(-maxVal,-cutVal,-minVal))
image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Oranges"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n") # image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Oranges"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2) # axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE) # axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
dev.off() # dev.off()
} # }
plotMatGreens<-function(pred_name){ # plotMatGreens<-function(pred_name){
pred = as.matrix(read.table(paste(pred_name, ".txt", sep=""))) # pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
Iwidth = round(sqrt(dim(pred)[[2]])) + 3 # Iwidth = round(sqrt(dim(pred)[[2]])) + 3
sel=seq(1,dim(pred)[[2]]) # sel=seq(1,dim(pred)[[2]])
prop=99 # prop=99
pred[is.na(pred)]=0 # pred[is.na(pred)]=0
minVal = floor(min(pred)) # minVal = floor(min(pred))
maxVal = ceiling(max(pred)) # maxVal = ceiling(max(pred))
p = (100-prop)/100 # p = (100-prop)/100
cutVal = quantile(pred,c(0,p))[2] # cutVal = quantile(pred,c(0,p))[2]
jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300) # jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
print(c(-maxVal,-cutVal,-minVal)) # print(c(-maxVal,-cutVal,-minVal))
image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Greens"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n") # image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Greens"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2) # axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE) # axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
dev.off() # dev.off()
} # }
plotMatBlues<-function(pred_name){ # plotMatBlues<-function(pred_name){
pred = as.matrix(read.table(paste(pred_name, ".txt", sep=""))) # pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
sel=seq(1,dim(pred)[[2]]) # sel=seq(1,dim(pred)[[2]])
Iwidth = round(sqrt(dim(pred)[[2]])) + 3 # Iwidth = round(sqrt(dim(pred)[[2]])) + 3
prop=99 #95 # prop=99 #95
pred[is.na(pred)]=0 # pred[is.na(pred)]=0
minVal = floor(min(pred)) # minVal = floor(min(pred))
maxVal = ceiling(max(pred)) # maxVal = ceiling(max(pred))
p = (100-prop)/100 # p = (100-prop)/100
cutVal = quantile(pred,seq(0,p,by=p))[2] # cutVal = quantile(pred,seq(0,p,by=p))[2]
print(c(-maxVal,-minVal,-cutVal)) # print(c(-maxVal,-minVal,-cutVal))
jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300) # jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
#image(sel,1:20,-t(pred[,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[11:20],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[71:80]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n") # #image(sel,1:20,-t(pred[,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[11:20],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[71:80]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n") # image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2) # axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE) # axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
dev.off() # dev.off()
} # }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment