Commit 39265aff by DLA

Updates

parent 24dbbbb7
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 10 02:56:41 2021
@author: awadmin
"""
from sklearn.manifold import TSNE
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
list_aa = ['CYS', 'ASP', 'SER', 'GLN', 'LYS', 'ILE', 'PRO',
'THR', 'PHE', 'ASN', 'GLY', 'HIS', 'LEU', 'ARG',
'TRP', 'ALA', 'VAL', 'GLU', 'TYR', 'MET']
list_aa.sort()
list_aa_filter = ['ARG','ASP','GLU', 'GLN', 'ASN','PRO']
hue_order_scr = ['SUP','COR','RIM', 'SUR', 'INT']
hue_color_scr = {'SUP': 'red','COR': '#d4aa00ff','RIM':'blue', 'SUR':'purple', 'INT':'gray'}
hue_order_rl = ['R','L']
hue_color_rl = {'R': 'cyan','L': 'orange'}
hue_color_aa = {}
cp = sns.color_palette('Paired', n_colors=20)
for i, aa in enumerate(list_aa):
hue_color_aa[aa] = cp[i]
hue_order_aa = list_aa
filename_source = 'intermediate_xray_skempi_wt_nomask_200'
#filename_source = 'intermediate_xray_skempi_wt_mask_sidechain_200'
#filename_source = 'intermediate_xray_skempi_wt_mask_sphere5_randomcenter_200'
#filename_source = 'intermediate_backrub_nomask'
#filename_source = 'intermediate_xray_skempi_wt_nomask_200_4channels'
#filename_source = 'intermediate_xray_skempi_wt_mask_sidechain_200_4channels'
#filename_source = 'intermediate_xray_skempi_wt_mask_sphere5_randomcenter_200_4channels'
for algo in ['_tSNE.csv','_PCA.csv']:
filename = filename_source + algo
df = pd.read_csv(filename, sep='\t')
df['resregion'] = df.resregion.replace({'S': 'SUP', 'C': 'COR', 'R': 'RIM'})
#df = df.loc[df.resname.isin(list_aa_filter)]
#df = df.loc[df.complex.str.contains('1BRS')]
plt.rcParams.update({'font.size': 24})
sns.jointplot(data=df, x='PC1', y='PC2', hue = 'resname', kind='scatter', height=20, hue_order=hue_order_aa, palette=hue_color_aa)
plt.tight_layout()
plt.savefig(filename.replace('.csv', '_aa_scatterplot.png'))
sns.jointplot(data=df, x='PC1', y='PC2', hue = 'resregion', kind='scatter', height=20, hue_order=hue_order_scr, palette=hue_color_scr)
plt.tight_layout()
plt.savefig(filename.replace('.csv', '_scr_scatterplot.png'))
sns.jointplot(data=df, x='PC1', y='PC2', hue = 'partner', kind='scatter', height=20, hue_order=hue_order_rl, palette=hue_color_rl)
plt.tight_layout()
plt.savefig(filename.replace('.csv', '_rl_scatterplot.png'))
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 10 02:56:41 2021
@author: awadmin
"""
from sklearn.manifold import TSNE
import pandas as pd
import numpy as np
#filename = 'intermediate_xray_skempi_wt_nomask_200.csv'
#filename = 'intermediate_mutation_masked-all-aa.csv'
filename = 'intermediate_xray_skempi_wt_nomask_200.csv'
filename_out = filename.replace('.csv', '_tSNE.csv')
l_embed = 200
df = pd.read_csv(filename, sep='\t')
df[list(range(l_embed))] = df['embeddings'].str.split(',', expand=True)
X_embedded = TSNE(n_components=2,random_state=42, perplexity=500, early_exaggeration=1).fit_transform(df[range(0,l_embed)])
df['PC1'] = X_embedded[:,0]
df['PC2'] = X_embedded[:,1]
df=df.drop(['embeddings'], axis=1)
df.to_csv(filename_out, sep='\t')
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 13 21:09:56 2020
@author: yasser
"""
import logging
import os
import sys
import gc
from os import path, mkdir, getenv, listdir, remove, system, stat
import pandas as pd
import numpy as np
#from prody import *
import glob
import shutil
#import matplotlib.pyplot as plt
import seaborn as sns
from math import exp
import subprocess
from subprocess import CalledProcessError, check_call
import traceback
from random import shuffle, random, seed, sample
from numpy import newaxis
import matplotlib.pyplot as plt
import time
import collections
#import scr
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
import tensorflow.keras
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.datasets import mnist # subroutines for fetching the MNIST dataset
from tensorflow.keras.models import Model, Sequential,load_model # basic class for specifying and training a neural network
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten, AveragePooling3D
#from tensorflow.keras.utils import np_utils # utilities for one-hot encoding of ground truth values
from tensorflow.keras.layers import Dot
from tensorflow.keras.backend import ones, ones_like
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score, roc_curve, precision_recall_curve
from sklearn.preprocessing import MinMaxScaler
import pickle
print('Your python version: {}'.format(sys.version_info.major))
USE_TENSORFLOW_AS_BACKEND = True
# IF YOU *DO* HAVE AN Nvidia GPU on your computer, or execute on Google COLAB, then change below to False!
FORCE_CPU = False #False
if USE_TENSORFLOW_AS_BACKEND:
os.environ['KERAS_BACKEND'] = 'tensorflow'
else:
os.environ['KERAS_BACKEND'] = 'theano'
if FORCE_CPU:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
if USE_TENSORFLOW_AS_BACKEND == True:
import tensorflow as tf
print('Your tensorflow version: {}'.format(tf.__version__))
print("GPU : "+tf.test.gpu_device_name())
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
import theano
print('Your theano version: {}'.format(theano.__version__))
logging.basicConfig(filename='manager.log', filemode='w', format='%(levelname)s: %(message)s', level=logging.DEBUG)
mainlog = logging.getLogger('main')
logging.Logger
seed(int(np.round(np.random.random()*10)))
#################################################################################################
def save_obj(obj, name):
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
one_letter ={'VAL':'V', 'ILE':'I', 'LEU':'L', 'GLU':'E', 'GLN':'Q', \
'ASP':'D', 'ASN':'N', 'HIS':'H', 'TRP':'W', 'PHE':'F', 'TYR':'Y', \
'ARG':'R', 'LYS':'K', 'SER':'S', 'THR':'T', 'MET':'M', 'ALA':'A', \
'GLY':'G', 'PRO':'P', 'CYS':'C'}
three_letter ={'V':'VAL', 'I':'ILE', 'L':'LEU', 'E':'GLU', 'Q':'GLN', \
'D':'ASP', 'N':'ASN', 'H':'HIS', 'W':'TRP', 'F':'PHE', 'Y':'TYR', \
'R':'ARG', 'K':'LYS', 'S':'SER', 'T':'THR', 'M':'MET', 'A':'ALA', \
'G':'GLY', 'P':'PRO', 'C':'CYS'}
v_dim = 24
#map_dir = 'validations'
#map_dir = 'map_dir_backrub_dynamics'
#map_dir = 'skempi_mutations_mask/map_dir_skempi_wt'
#map_dir = 'skempi_mutations_mask/map_dir_mut_sep'
map_dir = '/home/yasser/myThesis/ProtPartDisc/python_scripts/map_dir_mut_sep_nomask'
output_file = 'output_backrub_nomask.csv'
intermediate_file = 'intermediate_backrub_nomask_200.csv'
model = load_model(path.join('models5_nonorm_classweight_Porlineweight08', '0_30_model'))
def load_map(sample_test):
check_call(
[
'lz4', '-d', '-f',
sample_test
],
stdout=sys.stdout)
#X, y, y_ddg = load_obj(sample_test.replace('.pkl.lz4',''))
X, y, y_ddg, region, comp_type, expr_method = load_obj(sample_test.replace('.pkl.lz4',''))
#Filter features (SCR and RL)
X = X[:,:,:,:,:167]
remove(sample_test.replace('.lz4',''))
#return X, y, y_ddg
return X, y, y_ddg, region, comp_type, expr_method
samples = glob.glob(path.join(map_dir,'*','1','*'))
output_handler = open(output_file, 'w')
intermediate_handler = open(intermediate_file, 'w')
output_handler.write('complex' + '\t' + 'resname' + '\t' + 'resregion' + '\t' + 'resnumber' + '\t' + 'partner' + '\t' + 'type' + '\t' + 'prediction' + '\t' + 'target' + '\t' + 'ddg' + '\n')
intermediate_handler.write('complex' + '\t' + 'resname' + '\t' + 'resregion' + '\t' + 'resnumber' + '\t' + 'partner' + '\t' + 'type' + '\t' + 'embeddings' + '\t' + 'ddg' + '\n')
for sample_test in samples:
try:
print(sample_test)
#X, y, y_ddg = load_map(sample_test)
X, y, y_ddg, region, comp_type, expr_method = load_map(sample_test)
except Exception as e:
logging.info("Bad interface!" + '\nError message: ' + str(e) +
"\nMore information:\n" + traceback.format_exc())
continue
comp_name=path.basename(sample_test).replace('.pkl.lz4', '')
if comp_name.split('_')[0] == 'wt':
res_name = three_letter[comp_name.split('--')[1][0]]
else:
res_name = three_letter[comp_name.split('--')[1][-1]]
inter_info = ('NA','NA',comp_name.split('--')[1][2:-1],comp_name.split('--')[1][1]) #Must be fixed
#reg_type = 'NA' #Must be fixed (the information must have been embedded in the cube generation generate_ddg2.py)
reg_type = region
start = time.time()
y_preds = model.predict([X], batch_size=X.shape[0])[0]
end = time.time()
intermediate_model = Model(inputs=model.input, outputs=model.get_layer('layer1').output)
intermediate_prediction = intermediate_model.predict([X], batch_size=X.shape[0])[0]
_ = gc.collect()
y = y[0]
output_handler.write(comp_name + '\t' +
res_name + '\t' +
reg_type + '\t' +
str(inter_info[2]) + '\t' +
inter_info[3] + '\t' +
comp_type + '\t' +
','.join(list(map(lambda x: str(x), y_preds))) + '\t' +
','.join(list(map(lambda x: str(x), y))) + '\t' +
str(y_ddg) + '\n')
intermediate_handler.write(comp_name + '\t' +
res_name + '\t' +
reg_type + '\t' +
str(inter_info[2]) + '\t' +
inter_info[3] + '\t' +
comp_type + '\t' +
','.join(list(map(lambda x: str(x), intermediate_prediction))) + '\t' +
str(y_ddg) + '\n')
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 13 21:09:56 2020
@author: yasser
"""
import logging
import os
import sys
import gc
from os import path, mkdir, getenv, listdir, remove, system, stat
import pandas as pd
import numpy as np
#from prody import *
import glob
import shutil
#import matplotlib.pyplot as plt
import seaborn as sns
from math import exp
import subprocess
from subprocess import CalledProcessError, check_call
import traceback
from random import shuffle, random, seed, sample
from numpy import newaxis
import matplotlib.pyplot as plt
import time
import collections
#import scr
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
import tensorflow.keras
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.datasets import mnist # subroutines for fetching the MNIST dataset
from tensorflow.keras.models import Model, Sequential,load_model # basic class for specifying and training a neural network
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten, AveragePooling3D
#from tensorflow.keras.utils import np_utils # utilities for one-hot encoding of ground truth values
from tensorflow.keras.layers import Dot
from tensorflow.keras.backend import ones, ones_like
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score, roc_curve, precision_recall_curve
from sklearn.preprocessing import MinMaxScaler
import pickle
print('Your python version: {}'.format(sys.version_info.major))
USE_TENSORFLOW_AS_BACKEND = True
# IF YOU *DO* HAVE AN Nvidia GPU on your computer, or execute on Google COLAB, then change below to False!
FORCE_CPU = False #False
if USE_TENSORFLOW_AS_BACKEND:
os.environ['KERAS_BACKEND'] = 'tensorflow'
else:
os.environ['KERAS_BACKEND'] = 'theano'
if FORCE_CPU:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
if USE_TENSORFLOW_AS_BACKEND == True:
import tensorflow as tf
print('Your tensorflow version: {}'.format(tf.__version__))
print("GPU : "+tf.test.gpu_device_name())
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
import theano
print('Your theano version: {}'.format(theano.__version__))
logging.basicConfig(filename='manager.log', filemode='w', format='%(levelname)s: %(message)s', level=logging.DEBUG)
mainlog = logging.getLogger('main')
logging.Logger
seed(int(np.round(np.random.random()*10)))
#################################################################################################
def save_obj(obj, name):
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
one_letter ={'VAL':'V', 'ILE':'I', 'LEU':'L', 'GLU':'E', 'GLN':'Q', \
'ASP':'D', 'ASN':'N', 'HIS':'H', 'TRP':'W', 'PHE':'F', 'TYR':'Y', \
'ARG':'R', 'LYS':'K', 'SER':'S', 'THR':'T', 'MET':'M', 'ALA':'A', \
'GLY':'G', 'PRO':'P', 'CYS':'C'}
three_letter ={'V':'VAL', 'I':'ILE', 'L':'LEU', 'E':'GLU', 'Q':'GLN', \
'D':'ASP', 'N':'ASN', 'H':'HIS', 'W':'TRP', 'F':'PHE', 'Y':'TYR', \
'R':'ARG', 'K':'LYS', 'S':'SER', 'T':'THR', 'M':'MET', 'A':'ALA', \
'G':'GLY', 'P':'PRO', 'C':'CYS'}
v_dim = 24
#map_dir = 'validations'
#map_dir = 'map_dir_backrub_dynamics'
#map_dir = 'skempi_mutations_mask/map_dir_skempi_wt'
#map_dir = 'skempi_mutations_mask/map_dir_mut_sep'
map_dir = '/home/yasser/myThesis/ProtPartDisc/python_scripts/map_dir_mut_sep_nomask_4channels'
output_file = 'output_backrub_nomask_4channels.csv'
intermediate_file = 'intermediate_backrub_nomask_4channels.csv'
model = load_model(path.join('models5_nonorm_classweight_Porlineweight08_Others_4channels', '0_733_model'))
def load_map(sample_test):
check_call(
[
'lz4', '-d', '-f',
sample_test
],
stdout=sys.stdout)
#X, y, y_ddg = load_obj(sample_test.replace('.pkl.lz4',''))
X, y, y_ddg, region, comp_type, expr_method = load_obj(sample_test.replace('.pkl.lz4',''))
#Filter features (SCR and RL)
X = X[:,:,:,:,:4]
remove(sample_test.replace('.lz4',''))
#return X, y, y_ddg
return X, y, y_ddg, region, comp_type, expr_method
samples = glob.glob(path.join(map_dir,'*','1','*'))
output_handler = open(output_file, 'w')
intermediate_handler = open(intermediate_file, 'w')
output_handler.write('complex' + '\t' + 'resname' + '\t' + 'resregion' + '\t' + 'resnumber' + '\t' + 'partner' + '\t' + 'type' + '\t' + 'prediction' + '\t' + 'target' + '\t' + 'ddg' + '\n')
intermediate_handler.write('complex' + '\t' + 'resname' + '\t' + 'resregion' + '\t' + 'resnumber' + '\t' + 'partner' + '\t' + 'type' + '\t' + 'embeddings' + '\t' + 'ddg' + '\n')
for sample_test in samples:
try:
print(sample_test)
#X, y, y_ddg = load_map(sample_test)
X, y, y_ddg, region, comp_type, expr_method = load_map(sample_test)
except Exception as e:
logging.info("Bad interface!" + '\nError message: ' + str(e) +
"\nMore information:\n" + traceback.format_exc())
continue
comp_name=path.basename(sample_test).replace('.pkl.lz4', '')
if comp_name.split('_')[0] == 'wt':
res_name = three_letter[comp_name.split('--')[1][0]]
else:
res_name = three_letter[comp_name.split('--')[1][-1]]
inter_info = ('NA','NA',comp_name.split('--')[1][2:-1],comp_name.split('--')[1][1]) #Must be fixed
#reg_type = 'NA' #Must be fixed (the information must have been embedded in the cube generation generate_ddg2.py)
reg_type = region
start = time.time()
y_preds = model.predict([X], batch_size=X.shape[0])[0]
end = time.time()
intermediate_model = Model(inputs=model.input, outputs=model.get_layer('layer2').output)
intermediate_prediction = intermediate_model.predict([X], batch_size=X.shape[0])[0]
_ = gc.collect()
y = y[0]
output_handler.write(comp_name + '\t' +
res_name + '\t' +
reg_type + '\t' +
str(inter_info[2]) + '\t' +
inter_info[3] + '\t' +
comp_type + '\t' +
','.join(list(map(lambda x: str(x), y_preds))) + '\t' +
','.join(list(map(lambda x: str(x), y))) + '\t' +
str(y_ddg) + '\n')
intermediate_handler.write(comp_name + '\t' +
res_name + '\t' +
reg_type + '\t' +
str(inter_info[2]) + '\t' +
inter_info[3] + '\t' +
comp_type + '\t' +
','.join(list(map(lambda x: str(x), intermediate_prediction))) + '\t' +
str(y_ddg) + '\n')
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 13 21:09:56 2020
@author: yasser
"""
import logging
import os
import sys
import gc
from os import path, mkdir, getenv, listdir, remove, system, stat
import pandas as pd
import numpy as np
#from prody import *
import glob
import shutil
#import matplotlib.pyplot as plt
import seaborn as sns
from math import exp
import subprocess
from subprocess import CalledProcessError, check_call
import traceback
from random import shuffle, random, seed, sample
from numpy import newaxis
import matplotlib.pyplot as plt
import time
import collections
#import scr
from numpy import asarray
from sklearn.preprocessing import OneHotEncoder
import tensorflow.keras
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.datasets import mnist # subroutines for fetching the MNIST dataset
from tensorflow.keras.models import Model, Sequential,load_model # basic class for specifying and training a neural network
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten, AveragePooling3D
#from tensorflow.keras.utils import np_utils # utilities for one-hot encoding of ground truth values
from tensorflow.keras.layers import Dot
from tensorflow.keras.backend import ones, ones_like
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score, roc_curve, precision_recall_curve, log_loss
from sklearn.preprocessing import MinMaxScaler
import pickle
import scipy
print('Your python version: {}'.format(sys.version_info.major))
USE_TENSORFLOW_AS_BACKEND = True
# IF YOU *DO* HAVE AN Nvidia GPU on your computer, or execute on Google COLAB, then change below to False!
FORCE_CPU = False #False
if USE_TENSORFLOW_AS_BACKEND:
os.environ['KERAS_BACKEND'] = 'tensorflow'
else:
os.environ['KERAS_BACKEND'] = 'theano'
if FORCE_CPU:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
if USE_TENSORFLOW_AS_BACKEND == True:
import tensorflow as tf
print('Your tensorflow version: {}'.format(tf.__version__))
print("GPU : "+tf.test.gpu_device_name())
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
import theano
print('Your theano version: {}'.format(theano.__version__))
logging.basicConfig(filename='manager.log', filemode='w', format='%(levelname)s: %(message)s', level=logging.DEBUG)
mainlog = logging.getLogger('main')
logging.Logger
seed(int(np.round(np.random.random()*10)))
#################################################################################################
def save_obj(obj, name):
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
one_letter ={'VAL':'V', 'ILE':'I', 'LEU':'L', 'GLU':'E', 'GLN':'Q', \
'ASP':'D', 'ASN':'N', 'HIS':'H', 'TRP':'W', 'PHE':'F', 'TYR':'Y', \
'ARG':'R', 'LYS':'K', 'SER':'S', 'THR':'T', 'MET':'M', 'ALA':'A', \
'GLY':'G', 'PRO':'P', 'CYS':'C'}
three_letter ={'V':'VAL', 'I':'ILE', 'L':'LEU', 'E':'GLU', 'Q':'GLN', \
'D':'ASP', 'N':'ASN', 'H':'HIS', 'W':'TRP', 'F':'PHE', 'Y':'TYR', \
'R':'ARG', 'K':'LYS', 'S':'SER', 'T':'THR', 'M':'MET', 'A':'ALA', \
'G':'GLY', 'P':'PRO', 'C':'CYS'}
v_dim = 24
#map_dir = 'validations'
#map_dir = 'map_dir_skempi_wt'
map_dir = 'map_dir_skempi_wt_mask_sphere5_randomcenter'
output_file = 'output_xray_skempi_wt_mask_sphere5_randomcenter.csv'
intermediate_file = 'intermediate_xray_skempi_wt_mask_sphere5_randomcenter_200.csv'
model = load_model(path.join('models5_nonorm_classweight_Porlineweight08', '0_30_model'))
def load_map(sample_test):
check_call(
[
'lz4', '-d', '-f',
sample_test
],
stdout=sys.stdout)
X, y, reg_type, res_pos, res_name, inter_info = load_obj(sample_test.replace('.pkl.lz4',''))
#Filter features (SCR and RL)
X = X[:,:,:,:,:167]
remove(sample_test.replace('.lz4',''))
return X, y, reg_type, res_pos, res_name, inter_info
samples = glob.glob(path.join(map_dir,'*','1','*'))
output_handler = open(output_file, 'w')
intermediate_handler = open(intermediate_file, 'w')
output_handler.write('complex' + '\t' + 'resname' + '\t' + 'resregion' + '\t' + 'resnumber' + '\t' + 'respos' + '\t' + 'partner' + '\t' + 'prediction' + '\t' + 'target' + '\t' + 'entropy' + '\t' + 'crossentropy' + '\n')
intermediate_handler.write('complex' + '\t' + 'resname' + '\t' + 'resregion' + '\t' + 'resnumber' + '\t' + 'respos' + '\t' + 'partner' + '\t' + 'embeddings' + '\n')
for sample_test in samples:
try:
print(sample_test)
X, y, reg_type, res_pos, res_name, inter_info = load_map(sample_test)
except Exception as e:
logging.info("Bad interface!" + '\nError message: ' + str(e) +
"\nMore information:\n" + traceback.format_exc())
continue
X = np.array(X)
y = np.array(y)
if X.shape[0] > 400:
X = X[:400]
y = y[:400]
reg_type = reg_type[:400]
res_pos = res_pos[:400]
res_name = res_name[:400]
inter_info = inter_info[:400]
comp_name=path.basename(sample_test).replace('.pkl.lz4', '')
start = time.time()
y_preds = model.predict([X], batch_size=X.shape[0])
end = time.time()
intermediate_model = Model(inputs=model.input, outputs=model.get_layer('layer1').output)
intermediate_prediction = intermediate_model.predict([X], batch_size=X.shape[0])
_ = gc.collect()
for i in range(len(X)):
if y[i].sum() != 1: # No class were assigned to the artificial amino acids like MSE
continue
output_handler.write(comp_name + '\t' +
res_name[i][0] + '\t' +
reg_type[i] + '\t' +
str(inter_info[i][2]) + '\t' +
','.join(list(map(lambda x: str(x), res_pos[i]))) + '\t' +
inter_info[i][3] + '\t' +
','.join(list(map(lambda x: str(x), y_preds[i]))) + '\t' +
','.join(list(map(lambda x: str(x), y[i]))) + '\t' +
str(scipy.stats.entropy(y_preds[i])) + '\t' +
str(log_loss(y[i], y_preds[i])) + '\n')
intermediate_handler.write(comp_name + '\t' +
res_name[i][0] + '\t' +
reg_type[i] + '\t' +
str(inter_info[i][2]) + '\t' +
','.join(list(map(lambda x: str(x), res_pos[i]))) + '\t' +
inter_info[i][3] + '\t' +
','.join(list(map(lambda x: str(x), intermediate_prediction[i]))) + '\n')
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 13 21:09:56 2020
@author: yasser
"""
import logging
import os
import sys
import gc
from os import path, mkdir, getenv, listdir, remove, system, stat
import pandas as pd
import numpy as np
#from prody import *
import glob
import shutil