Commit e07912dd by DLA-Ranker

Updates

parent 1e0c8874
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 28 16:00:07 2022
@author: mohseni
"""
import glob
import numpy as np
from os import path, mkdir, getenv, listdir, remove, system, stat
from subprocess import CalledProcessError, check_call
import pickle
import sys
sys.path.insert(1, '../lib/')
import tools as tl
channels = {'ALA':['C','N','O','CA','CB'],
'ARG':['C','N','O','CA','CB','CG','CD','NE','CZ','NH1','NH2'],
'ASN':['C','N','O','CA','CB','CG','ND2','OD1'],
'ASP':['C','N','O','CA','CB','CG','OD1','OD2'],
'CYS':['C','N','O','CA','CB','SG'],
'GLN':['C','N','O','CA','CB','CG','CD','NE2','OE1'],
'GLU':['C','N','O','CA','CB','CG','CD','OE1','OE2'],
'GLY':['C','N','O','CA'],
'HIS':['C','N','O','CA','CB','CG','CD2','ND1','CE1','NE2'],
'ILE':['C','N','O','CA','CB','CG1','CG2','CD1'],
'LEU':['C','N','O','CA','CB','CG','CD1','CD2'],
'LYS':['C','N','O','CA','CB','CG','CD','CE','NZ'],
'MET':['C','N','O','CA','CB','CG','SD','CE'],
'PHE':['C','N','O','CA','CB','CG','CD1','CD2','CE1','CE2','CZ'],
'PRO':['C','N','O','CA','CB','CG','CD'],
'SER':['C','N','O','CA','CB','OG'],
'THR':['C','N','O','CA','CB','CG2','OG1'],
'TRP':['C','N','O','CA','CB','CG','CD1','CD2','CE2','CE3','NE1','CZ2','CZ3','CH2'],
'TYR':['C','N','O','CA','CB','CG','CD1','CD2','CE1','CE2','CZ','OH'],
'VAL':['C','N','O','CA','CB','CG1','CG2']}
def save_obj(obj, name):
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
def load_map(file_path):
check_call(
[
'lz4', '-d', '-f',
file_path
],
stdout=sys.stdout)
tuple_obj = load_obj(file_path.replace('.pkl.lz4',''))
remove(file_path.replace('.lz4',''))
return tuple_obj
def save_map(tuple_obj, file_path):
save_obj(tuple_obj, file_path)
check_call(
[
'lz4', '-f', #, '--rm' because if inconsistency in lz4 versions!
file_path + '.pkl'
],
stdout=sys.stdout)
remove(file_path + '.pkl')
v_dim = 24
n_channels = 4 + 4 + 2
all_channels = []
for aa, a_vector in channels.items():
all_channels += a_vector
C_index, O_index, N_index, S_index = [], [], [], []
for i,a in enumerate(all_channels):
if a[0] == "C":
C_index.append(i)
if a[0] == "O":
O_index.append(i)
if a[0] == "N":
N_index.append(i)
if a[0] == "S":
S_index.append(i)
samples= glob.glob(path.join('../Examples/map_dir','*','*','*.lz4'))
def process_sample(sample, report_dict):
try:
tuple_obj = load_map(sample)
n = len(tuple_obj)
X = tuple_obj[0]
X_new = np.zeros(X.shape[:-1] + tuple([n_channels]))
X_new[:,:,:,:,0] = X[:,:,:,:,C_index].sum(axis=4)
X_new[:,:,:,:,1] = X[:,:,:,:,N_index].sum(axis=4)
X_new[:,:,:,:,2] = X[:,:,:,:,O_index].sum(axis=4)
X_new[:,:,:,:,3] = X[:,:,:,:,S_index].sum(axis=4)
for i in range(6):
X_new[:,:,:,:,i+4] = X[:,:,:,:,167+i]
tuple_obj_new = (X_new,)
for i in range(1,n):
tuple_obj_new += (tuple_obj[i],)
save_map(tuple_obj_new, sample.replace('.pkl.lz4', '_'))
except:
pass
remove(sample)
return
cases = []
for sample in samples:
cases.append((sample,))
report_dict = tl.do_processing(cases, process_sample, True)
...@@ -68,6 +68,9 @@ hidden_size1 = 200 ...@@ -68,6 +68,9 @@ hidden_size1 = 200
hidden_size2 = 20 hidden_size2 = 20
v_dim = 24 v_dim = 24
atom_channels = 167
#atom_channels = 4
logging.basicConfig(filename='manager.log', filemode='w', format='%(levelname)s: %(message)s', level=logging.DEBUG) logging.basicConfig(filename='manager.log', filemode='w', format='%(levelname)s: %(message)s', level=logging.DEBUG)
mainlog = logging.getLogger('main') mainlog = logging.getLogger('main')
logging.Logger logging.Logger
...@@ -102,9 +105,42 @@ else: ...@@ -102,9 +105,42 @@ else:
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore') encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
onehot = encoder.fit(np.asarray([['S'], ['C'], ['R']])) onehot = encoder.fit(np.asarray([['S'], ['C'], ['R']]))
def Conv_3D_model(input_shape, input_shape_aux): def Conv_3D_model(input_shape, input_shape_aux):
X_in = Input(shape=input_shape) X_in = Input(shape=input_shape)
aux_input = Input(shape=input_shape_aux) aux_input = Input(shape=input_shape_aux)
H = Conv3D(20, kernel_size=(1, 1, 1), use_bias = True, padding = 'valid', activation='linear', kernel_initializer='he_uniform', input_shape=X_in.shape)(X_in)
H = BatchNormalization()(H)
H = Conv3D(20, kernel_size=(3, 3, 3), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=H.shape)(H)
H = BatchNormalization()(H)
H = Conv3D(30, kernel_size=(4, 4, 4), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=H.shape)(H)
H = BatchNormalization()(H)
H = Conv3D(20, kernel_size=(4, 4, 4), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=H.shape)(H)
H = BatchNormalization()(H)
H = AveragePooling3D(pool_size=(4, 4, 4), strides=(4, 4, 4))(H)
H = Flatten()(H)
H = Dropout(0.4)(H)
H = Concatenate()([H, aux_input])
H = Dense(hidden_size1, activation='elu', name='layer1', kernel_constraint=max_norm(4), bias_constraint=max_norm(4))(H)
H = Dropout(0.2)(H)
H = Dense(hidden_size2, activation='elu', name='layer2', kernel_constraint=max_norm(4), bias_constraint=max_norm(4))(H)
H = Dropout(0.1)(H)
Y = Dense(1, activation='sigmoid')(H)
_model = Model(inputs=[X_in, aux_input], outputs=Y)
_model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001))
_model.summary()
return _model
def Conv_3D_model_4channels(input_shape, input_shape_aux):
X_in = Input(shape=input_shape)
aux_input = Input(shape=input_shape_aux)
H = Conv3D(10, kernel_size=(3, 3, 3), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=X_in.shape)(X_in) H = Conv3D(10, kernel_size=(3, 3, 3), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=X_in.shape)(X_in)
H = BatchNormalization()(H) H = BatchNormalization()(H)
...@@ -164,8 +200,13 @@ d_class_weights = dict(enumerate(class_weights)) ...@@ -164,8 +200,13 @@ d_class_weights = dict(enumerate(class_weights))
for foldk in ['Total']: for foldk in ['Total']:
seed(int(np.round(np.random.random()*10))) seed(int(np.round(np.random.random()*10)))
input_shape=(v_dim,v_dim,v_dim,4+6) input_shape=(v_dim,v_dim,v_dim,atom_channels+6)
model = Conv_3D_model(input_shape, 3)
if atom_channels == 4:
model = Conv_3D_model_4channels(input_shape, 3)
else:
model = Conv_3D_model(input_shape, 3)
#model = load_model('Total_0_model') #model = load_model('Total_0_model')
with open(str(foldk) + '_train_interfaces.txt', 'w') as f_handler_trainlist: with open(str(foldk) + '_train_interfaces.txt', 'w') as f_handler_trainlist:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment