Updates

e07912dd · DLA-Ranker · 1e0c8874 · e07912dd · e07912dd
Commit e07912dd authored Apr 09, 2022 by DLA-Ranker
Hide whitespace changes
Inline Side-by-side

Showing with 160 additions and 2 deletions

generate_cubes_reduce_channels_multiproc.py Representation/generate_cubes_reduce_channels_multiproc.py +117 -0

train.py Train/train.py +43 -2

No files found.
--- a/Representation/generate_cubes_reduce_channels_multiproc.py
+++ b/Representation/generate_cubes_reduce_channels_multiproc.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 28 16:00:07 2022
+
+@author: mohseni
+"""
+
+import glob
+import numpy as np
+from os import path, mkdir, getenv, listdir, remove, system, stat
+from subprocess import CalledProcessError, check_call
+import pickle
+import sys
+
+sys.path.insert(1, '../lib/')
+import tools as tl
+
+channels = {'ALA':['C','N','O','CA','CB'], 
+            'ARG':['C','N','O','CA','CB','CG','CD','NE','CZ','NH1','NH2'], 
+            'ASN':['C','N','O','CA','CB','CG','ND2','OD1'], 
+            'ASP':['C','N','O','CA','CB','CG','OD1','OD2'], 
+            'CYS':['C','N','O','CA','CB','SG'], 
+            'GLN':['C','N','O','CA','CB','CG','CD','NE2','OE1'], 
+            'GLU':['C','N','O','CA','CB','CG','CD','OE1','OE2'], 
+            'GLY':['C','N','O','CA'], 
+            'HIS':['C','N','O','CA','CB','CG','CD2','ND1','CE1','NE2'], 
+            'ILE':['C','N','O','CA','CB','CG1','CG2','CD1'], 
+            'LEU':['C','N','O','CA','CB','CG','CD1','CD2'], 
+            'LYS':['C','N','O','CA','CB','CG','CD','CE','NZ'], 
+            'MET':['C','N','O','CA','CB','CG','SD','CE'], 
+            'PHE':['C','N','O','CA','CB','CG','CD1','CD2','CE1','CE2','CZ'], 
+            'PRO':['C','N','O','CA','CB','CG','CD'], 
+            'SER':['C','N','O','CA','CB','OG'], 
+            'THR':['C','N','O','CA','CB','CG2','OG1'], 
+            'TRP':['C','N','O','CA','CB','CG','CD1','CD2','CE2','CE3','NE1','CZ2','CZ3','CH2'], 
+            'TYR':['C','N','O','CA','CB','CG','CD1','CD2','CE1','CE2','CZ','OH'], 
+            'VAL':['C','N','O','CA','CB','CG1','CG2']}
+
+def save_obj(obj, name):
+    with open(name + '.pkl', 'wb') as f:
+        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
+
+def load_obj(name):
+    with open(name + '.pkl', 'rb') as f:
+        return pickle.load(f)
+
+def load_map(file_path):
+    check_call(
+        [
+            'lz4', '-d', '-f',
+            file_path
+        ],
+        stdout=sys.stdout)
+    tuple_obj = load_obj(file_path.replace('.pkl.lz4',''))
+    remove(file_path.replace('.lz4',''))
+    return tuple_obj
+
+def save_map(tuple_obj, file_path):
+    save_obj(tuple_obj, file_path)
+    check_call(
+        [
+            'lz4', '-f',   #, '--rm' because if inconsistency in lz4 versions! 
+            file_path + '.pkl'
+        ],
+        stdout=sys.stdout)
+    remove(file_path + '.pkl')
+
+v_dim = 24
+n_channels = 4 + 4 + 2
+
+all_channels = []
+for aa, a_vector in channels.items():
+    all_channels += a_vector
+    
+C_index, O_index, N_index, S_index = [], [], [], []
+for i,a in enumerate(all_channels):
+    if a[0] == "C":
+        C_index.append(i)
+    if a[0] == "O":
+        O_index.append(i)
+    if a[0] == "N":
+        N_index.append(i)
+    if a[0] == "S":
+        S_index.append(i)
+        
+samples= glob.glob(path.join('../Examples/map_dir','*','*','*.lz4'))
+
+
+def process_sample(sample, report_dict):
+    try:
+        tuple_obj = load_map(sample)
+        n = len(tuple_obj)
+        X = tuple_obj[0]
+        X_new = np.zeros(X.shape[:-1] + tuple([n_channels]))
+        
+        X_new[:,:,:,:,0] = X[:,:,:,:,C_index].sum(axis=4)
+        X_new[:,:,:,:,1] = X[:,:,:,:,N_index].sum(axis=4)
+        X_new[:,:,:,:,2] = X[:,:,:,:,O_index].sum(axis=4)
+        X_new[:,:,:,:,3] = X[:,:,:,:,S_index].sum(axis=4)
+    
+        for i in range(6):
+            X_new[:,:,:,:,i+4] = X[:,:,:,:,167+i]
+          
+        tuple_obj_new = (X_new,)
+        for i in range(1,n):
+            tuple_obj_new += (tuple_obj[i],)
+        save_map(tuple_obj_new, sample.replace('.pkl.lz4', '_'))
+    except:
+        pass
+    remove(sample)
+    return    
+
+cases = []
+for sample in samples:
+    cases.append((sample,))
+report_dict = tl.do_processing(cases, process_sample, True)
--- a/Train/train.py
+++ b/Train/train.py
@@ -68,6 +68,9 @@ hidden_size1 = 200
 hidden_size2 = 20
 v_dim = 24

+atom_channels = 167
+#atom_channels = 4
+
 logging.basicConfig(filename='manager.log', filemode='w', format='%(levelname)s: %(message)s', level=logging.DEBUG)
 mainlog = logging.getLogger('main')
 logging.Logger
@@ -102,9 +105,42 @@ else:
 encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
 onehot = encoder.fit(np.asarray([['S'], ['C'], ['R']]))

+
 def Conv_3D_model(input_shape, input_shape_aux):
    X_in = Input(shape=input_shape)
    aux_input = Input(shape=input_shape_aux)
+
+    H = Conv3D(20, kernel_size=(1, 1, 1), use_bias = True, padding = 'valid', activation='linear', kernel_initializer='he_uniform', input_shape=X_in.shape)(X_in)
+    H = BatchNormalization()(H)    
+    H = Conv3D(20, kernel_size=(3, 3, 3), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=H.shape)(H)
+    H = BatchNormalization()(H)
+    H = Conv3D(30, kernel_size=(4, 4, 4), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=H.shape)(H)
+    H = BatchNormalization()(H)
+    H = Conv3D(20, kernel_size=(4, 4, 4), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=H.shape)(H)
+    H = BatchNormalization()(H)
+    H = AveragePooling3D(pool_size=(4, 4, 4), strides=(4, 4, 4))(H)
+    H = Flatten()(H)
+    H = Dropout(0.4)(H)
+    
+    H = Concatenate()([H, aux_input])
+    
+    H = Dense(hidden_size1, activation='elu', name='layer1', kernel_constraint=max_norm(4), bias_constraint=max_norm(4))(H)
+    H = Dropout(0.2)(H)
+    
+    H = Dense(hidden_size2, activation='elu', name='layer2', kernel_constraint=max_norm(4), bias_constraint=max_norm(4))(H)
+    H = Dropout(0.1)(H)
+    
+    Y = Dense(1, activation='sigmoid')(H)
+
+    _model = Model(inputs=[X_in, aux_input], outputs=Y)
+    _model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001))
+    _model.summary()
+    return _model
+
+
+def Conv_3D_model_4channels(input_shape, input_shape_aux):
+    X_in = Input(shape=input_shape)
+    aux_input = Input(shape=input_shape_aux)
    
    H = Conv3D(10, kernel_size=(3, 3, 3), use_bias = True, padding = 'valid', activation='elu', kernel_initializer='he_uniform', input_shape=X_in.shape)(X_in)
    H = BatchNormalization()(H)
@@ -164,8 +200,13 @@ d_class_weights = dict(enumerate(class_weights))
 for foldk in ['Total']:
    seed(int(np.round(np.random.random()*10)))
    
-    input_shape=(v_dim,v_dim,v_dim,4+6)
-    model  = Conv_3D_model(input_shape, 3)
+    input_shape=(v_dim,v_dim,v_dim,atom_channels+6)
+    
+    if atom_channels == 4:
+        model  = Conv_3D_model_4channels(input_shape, 3)
+    else:
+        model  = Conv_3D_model(input_shape, 3)
+    
    #model = load_model('Total_0_model')
            
    with open(str(foldk) + '_train_interfaces.txt', 'w') as f_handler_trainlist: