Commit 11b750b2 by Mustafa Tekpinar

Plotting part was moved from R to Python.

parent a12e8938
......@@ -123,13 +123,13 @@ if(simple){
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"))
print("done")
if(simple){
print("generating the plots...")
plotMatBlues(paste(prot,"_normPred_evolEpi",sep=""))
plotMatGreens(paste(prot,"_normPred_evolInd",sep=""))
plotMatOranges(paste(prot,"_normPred_evolCombi",sep=""))
print("done")
}
# Since I am doing the plots with Python, I am commenting this part
# In addition, I want to get rid of R dependency completely!
# if(simple){
# plotMatBlues(paste(prot,"_normPred_evolEpi",sep=""))
# plotMatGreens(paste(prot,"_normPred_evolInd",sep=""))
# plotMatOranges(paste(prot,"_normPred_evolCombi",sep=""))
# }
# -*- coding: utf-8 -*-
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
......@@ -10,8 +11,148 @@ import argparse
import re
import subprocess
import math
import numpy as np
import matplotlib.pylab as plt
from gemmeAnal import *
alphabeticalAminoAcidsList = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
def parseGEMMEoutput(inputFile, verbose):
"""
Parse normalized (I don't know how?!) GEMME output files:
Independent, Epistatic and Combined
Return: Data File as a Numpy array, where each col contains
deleteriousness score for an amino acid.
verbose is a boolean value
"""
gemmeDataFile =open(inputFile, 'r')
allLines = gemmeDataFile.readlines()
gemmeDataFile.close()
headerLine = allLines[0]
if(verbose):
print(headerLine)
matrixData = []
aaColumn = []
for line in allLines[1:]:
tempList = []
data = line.split()
for item in data:
if item[0] == "\"":
aaColumn.append(item)
elif (item == 'NA'):
tempList.append(0.0000000)
else:
tempList.append(float(item))
matrixData.append(tempList)
mutationsData = np.array(matrixData)
# mutatedTransposed = mutationsData.T
# return mutatedTransposed
return mutationsData
def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
colorMap = 'coolwarm', offSet=0, pixelType='square'):
"""
A function to plot deep mutational scanning matrices.
Parameters
----------
scanningMatrix: numpy array of arrays
Data matrix to plot
outFile: string
Name of the output png image
colorMap: matplotlib cmap
Any colormap existing in matplotlib.
Default is coolwarm.
offSet: int
It is used to match the residue IDs in your PDB
file with 0 based indices read from scanningMatrix matrix
pixelType: string
It can only have 'square' or 'rectangle' values.
It is a matter of taste but I added it as an option.
Default is 'square'
Returns
-------
Nothing
"""
#We subtract 1 from beg bc matrix indices starts from 0
if(end == None):
end = len(scanningMatrix[0])
# print("Beginning: "+str(beg))
# print("End : "+str(end))
# print(len(scanningMatrix[0]))
subMatrix = scanningMatrix[:, (beg-1):end]
#print(subMatrix)
##########################################################################
# Set plotting parameters
nres_shown = len(subMatrix[0])
fig_height=8
# figure proportions
fig_width = fig_height/2 # inches
fig_width *= nres_shown/20
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
if (nres_shown >=200):
majorTics = 50
else:
majorTics = 25
major_nums_x = np.arange(majorTics, len(subMatrix[0]), majorTics, dtype=int)
major_nums_x = major_nums_x -1
major_nums_x = np.insert(major_nums_x, 0, 0)
# print(major_nums_x)
minor_nums_x = np.arange(10, len(subMatrix[0]), 10, dtype=int)
minor_nums_x = minor_nums_x - 1
minor_nums_x = np.insert(minor_nums_x, 0, 0)
# print(minor_nums_x)
major_labels_x = major_nums_x + 1 + offSet
major_nums_y = np.arange(0, 20, 1, dtype=int)
major_labels_y = alphabeticalAminoAcidsList
plt.xticks(major_nums_x, major_labels_x, size=28)
ax.set_xticks(minor_nums_x, minor=True)
plt.yticks(major_nums_y, major_labels_y, size=16)
ax.set_yticklabels(major_labels_y, ha='left')
ax.tick_params(axis='y', which='major', pad=30)
#############################################################################
if(pixelType=='square'):
#For plotting square pixels
plt.imshow(subMatrix, cmap=colorMap)
elif(pixelType=='rectangle'):
#For plotting rectangular pixels
plt.imshow(subMatrix, cmap=colorMap, aspect=3.0)
else:
print("\nERROR: Unknown pixelType specified!\n")
sys.exit(-1)
#plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
plt.tight_layout()
plt.savefig(outFile)
#plt.show()
#plt.imsave('output.png', subMatrix)
plt.close()
def check_argument_groups(parser, arg_dict, group, argument):
"""
......@@ -144,6 +285,31 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
launchJET(prot,retMet,bFile,fFile,n,N,nl)
print("done")
launchPred(prot,inAli,mutFile)
#Do Python plotting here
#TODO: Eventually, I will do the map plotting with a completely independent
# module and call the module here!
#TODO: Mark the original (wildtype) residue locations with a dot or something
# special to show the original amino acid.
#TODO: You can even put letters on the top line like in EVmutation output.
simple = True
if(simple):
print("generating the plots...")
gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi.png", 1, None,\
colorMap='Blues_r', offSet=0, pixelType='square')
gemmeData = parseGEMMEoutput(prot+"_normPred_evolInd.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd.png", 1, None,\
colorMap='Greens_r', offSet=0, pixelType='square')
gemmeData = parseGEMMEoutput(prot+"_normPred_evolCombi.txt", verbose=False)
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi.png", 1, None,\
colorMap='Oranges_r', offSet=0, pixelType='square')
print("done")
cleanTheMess(prot,bFile,fFile)
......
......@@ -457,66 +457,68 @@ normalizePredSelMult<-function(pred, trace, wt, listMut){
return(-normPred)
}
library("RColorBrewer")
##############################################
### functions for ploting the results
##############################################
plotMatOranges<-function(pred_name){
pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
sel=seq(1,dim(pred)[[2]])
Iwidth = round(sqrt(dim(pred)[[2]])) + 3
prop=99
pred[is.na(pred)]=0
minVal = floor(min(pred))
maxVal = ceiling(max(pred))
p = (100-prop)/100
cutVal = quantile(pred,c(0,p))[2]
jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
print(c(-maxVal,-cutVal,-minVal))
image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Oranges"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
dev.off()
}
plotMatGreens<-function(pred_name){
pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
Iwidth = round(sqrt(dim(pred)[[2]])) + 3
sel=seq(1,dim(pred)[[2]])
prop=99
pred[is.na(pred)]=0
minVal = floor(min(pred))
maxVal = ceiling(max(pred))
p = (100-prop)/100
cutVal = quantile(pred,c(0,p))[2]
jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
print(c(-maxVal,-cutVal,-minVal))
image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Greens"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
dev.off()
}
plotMatBlues<-function(pred_name){
pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
sel=seq(1,dim(pred)[[2]])
Iwidth = round(sqrt(dim(pred)[[2]])) + 3
prop=99 #95
pred[is.na(pred)]=0
minVal = floor(min(pred))
maxVal = ceiling(max(pred))
p = (100-prop)/100
cutVal = quantile(pred,seq(0,p,by=p))[2]
print(c(-maxVal,-minVal,-cutVal))
jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
#image(sel,1:20,-t(pred[,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[11:20],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[71:80]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
dev.off()
}
# Moved deep mutational map production from R to Python
# Therefore, I commented the following part till the end.
# by MT
# library("RColorBrewer")
# ##############################################
# ### functions for ploting the results
# ##############################################
# plotMatOranges<-function(pred_name){
# pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
# sel=seq(1,dim(pred)[[2]])
# Iwidth = round(sqrt(dim(pred)[[2]])) + 3
# prop=99
# pred[is.na(pred)]=0
# minVal = floor(min(pred))
# maxVal = ceiling(max(pred))
# p = (100-prop)/100
# cutVal = quantile(pred,c(0,p))[2]
# jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
# print(c(-maxVal,-cutVal,-minVal))
# image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Oranges"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
# axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
# axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
# dev.off()
# }
# plotMatGreens<-function(pred_name){
# pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
# Iwidth = round(sqrt(dim(pred)[[2]])) + 3
# sel=seq(1,dim(pred)[[2]])
# prop=99
# pred[is.na(pred)]=0
# minVal = floor(min(pred))
# maxVal = ceiling(max(pred))
# p = (100-prop)/100
# cutVal = quantile(pred,c(0,p))[2]
# jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
# print(c(-maxVal,-cutVal,-minVal))
# image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Greens"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
# axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
# axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
# dev.off()
# }
# plotMatBlues<-function(pred_name){
# pred = as.matrix(read.table(paste(pred_name, ".txt", sep="")))
# sel=seq(1,dim(pred)[[2]])
# Iwidth = round(sqrt(dim(pred)[[2]])) + 3
# prop=99 #95
# pred[is.na(pred)]=0
# minVal = floor(min(pred))
# maxVal = ceiling(max(pred))
# p = (100-prop)/100
# cutVal = quantile(pred,seq(0,p,by=p))[2]
# print(c(-maxVal,-minVal,-cutVal))
# jpeg(paste(pred_name, '.jpg', sep=""), width = Iwidth, height = 5, units = 'in', res = 300)
# #image(sel,1:20,-t(pred[,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[11:20],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[71:80]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
# image(sel,1:20,-t(pred[20:1,sel]),breaks=c(seq(-maxVal,-cutVal,le=80),-minVal),col=c(colorRampPalette(brewer.pal(9,"Greys"))(80)[1:10],colorRampPalette(brewer.pal(9,"Blues"))(80)[11:70],colorRampPalette(brewer.pal(9,"Greys"))(80)[61:70]),yaxt="n",xaxt="n",ylab="",xlab="",bty="n")
# axis(1,c(sel[1]-1,sel[seq(0,length(sel),by=10)]),las=2)
# axis(2,1:20,toupper(rownames(pred)[20:1]),las=2,tick=FALSE)
# dev.off()
# }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment