Skip to content

Potential memory leak with Tensorflow backend #10

@Freya-Ebba-Christ

Description

@Freya-Ebba-Christ

When running the LSTM decoder in ManyDecoders_FullData and Keras with the TF backend I am experiencing a memory leak. The problem is well known. What seems to work is to explicitly delete the model, clear the session and call the garbage collector by adding

        del model_lstm
        K.clear_session()
        gc.collect()

Within the import section, I have also added

from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

from keras import backend as K
import gc
K.clear_session()
gc.collect()

These changes also make it possible to share a GPU without taking precious GPU memory form the other user/session.

For selecting the GPU(Nvidia only) I run
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1" # use id from $ nvidia-smi

alternatively,

from keras import backend as K
import tensorflow as tf
with K.tf.device('/gpu:1'):
config = tf.ConfigProto(device_count = {'GPU' : 1})
session = tf.Session(config=config)
K.set_session(session)

should also work.

##### LSTM ######
if run_lstm:
    ### Get hyperparameters using Bayesian optimization based on validation set R2 values###

    #Define a function that returns the metric we are trying to optimize (R2 value of the validation set)
    #as a function of the hyperparameter we are fitting        
    def lstm_evaluate(num_units,frac_dropout,n_epochs):
        num_units=int(num_units)
        frac_dropout=float(frac_dropout)
        n_epochs=int(n_epochs)
        model_lstm=LSTMDecoder(units=num_units,dropout=frac_dropout,num_epochs=n_epochs)
        model_lstm.fit(X_train,y_train)
        y_valid_predicted_lstm=model_lstm.predict(X_valid)
        
        del model_lstm
        K.clear_session()
        gc.collect()

        return np.mean(get_R2(y_valid,y_valid_predicted_lstm))
    
    #Do bayesian optimization

    lstmBO = BayesianOptimization(lstm_evaluate, {'num_units': (50, 600), 'frac_dropout': (0,.5), 'n_epochs': (2,21)})
    lstmBO.maximize(init_points=20, n_iter=20, kappa=10)
    best_params=lstmBO.res['max']['max_params']
    frac_dropout=float(best_params['frac_dropout'])
    n_epochs=np.int(best_params['n_epochs'])
    num_units=np.int(best_params['num_units'])

    # Run model w/ above hyperparameters
    
    model_lstm=LSTMDecoder(units=num_units,dropout=frac_dropout,num_epochs=n_epochs)
    model_lstm.fit(X_train,y_train)
    y_test_predicted_lstm=model_lstm.predict(X_test)
    mean_r2_lstm[i]=np.mean(get_R2(y_test,y_test_predicted_lstm))    
    #Print test set R2
    R2s_lstm=get_R2(y_test,y_test_predicted_lstm)
    print('R2s:', R2s_lstm)   
    #Add predictions of training/validation/testing to lists (for saving)        
    y_pred_lstm_all.append(y_test_predicted_lstm)
    y_train_pred_lstm_all.append(model_lstm.predict(X_train))
    y_valid_pred_lstm_all.append(model_lstm.predict(X_valid))
    
    del model_lstm
    K.clear_session()
    gc.collect()
   
print ("\n") #Line break after each fold   
time_elapsed=time.time()-t1 #How much time has passed

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions