Update algo for cocos

FilipCivljak · FilipCivljak · commit f98ff4637579 · 2024-08-05T17:52:40.000+02:00
Signed-off-by: filip &lt;filipcivljak007@gmail.com&gt;
diff --git a/rul-turbofan/pred-model.py b/rul-turbofan/pred-model.py
@@ -7,18 +7,21 @@
 import matplotlib.pyplot as plt
 from torch.utils.data import DataLoader, Dataset
 import os
-import zipfile
+
+# Set directories
+datasets_dir = 'datasets'  # Relative path to the datasets directory
+results_dir = 'results'  # Relative path to the results directory
 
 # Load datasets
-test_df = pd.read_csv('test_FD003.txt', sep=r'\s+', header=None)
-rul_df = pd.read_csv('RUL_FD003.txt', sep=r'\s+', header=None)
+test_df = pd.read_csv(os.path.join(datasets_dir, 'test_FD003.txt'), sep=r'\s+', header=None)
+rul_df = pd.read_csv(os.path.join(datasets_dir, 'RUL_FD003.txt'), sep=r'\s+', header=None)
 
-column_names = ['id', 'cycle'] + ['setting1', 'setting2', 'setting3'] + ['s' + str(i) for i in range(1, 22)] # column names include 's1' to 's21', covering 21 columns in total.
+column_names = ['id', 'cycle'] + ['setting1', 'setting2', 'setting3'] + ['s' + str(i) for i in range(1, 22)]
 test_df.columns = column_names
 rul_df.columns = ['RUL']
 
 # Load scaler
-scaler = joblib.load('scaler.pkl')
+scaler = joblib.load(os.path.join(datasets_dir, 'scaler.pkl'))
 cols_normalize = test_df.columns.difference(['id', 'cycle'])
 
 # Normalize test data
@@ -87,7 +90,6 @@ def forward(self, x):
         
         return out
 
-
 # Set device based on availability of CUDA (GPU)
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
@@ -104,7 +106,7 @@ def forward(self, x):
 model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
 
 # Load the trained model's state dictionary from a saved file
-model.load_state_dict(torch.load('model.pth', map_location=device))
+model.load_state_dict(torch.load(os.path.join(datasets_dir, 'model.pth'), map_location=device, weights_only=True))
 
 # Move the model to the appropriate device (GPU or CPU)
 model = model.to(device)
@@ -143,14 +145,15 @@ def visualize_and_save(df, save_path):
     plt.savefig(save_path)
     plt.close()
 
-output_file = 'result-plot.png'
-visualize_and_save(result_df, output_file)
+# Create results directory if it does not exist
+os.makedirs(results_dir, exist_ok=True)
 
-# Define the name for the zip file
-zip_filename = 'predictions.zip'
+# Save the result plot
+output_file = os.path.join(results_dir, 'result-plot.png')
+visualize_and_save(result_df, output_file)
 
-# Zip the file
-with zipfile.ZipFile(zip_filename, 'w') as zipf:
-    zipf.write(output_file, os.path.basename(output_file))
+# Save the result DataFrame as a CSV file
+result_csv = os.path.join(results_dir, 'prediction_results.csv')
+result_df.to_csv(result_csv, index=False)
 
-print(f"Plot saved as '{output_file}' and zipped successfully as '{zip_filename}'.")
+print(f"Plot saved as '{output_file}' and results saved as '{result_csv}'.")
diff --git a/rul-turbofan/rul-training.py b/rul-turbofan/rul-training.py
@@ -9,12 +9,18 @@
 import joblib
 import matplotlib.pyplot as plt
 import os
-import zipfile
+
+# Directory paths
+datasets_dir = 'datasets'
+results_dir = 'results'
+
+# Ensure the results directory exists
+os.makedirs(results_dir, exist_ok=True)
 
 # Load datasets
-train_df = pd.read_csv('train_FD001.txt', sep=r'\s+', header=None)
-test_df = pd.read_csv('test_FD001.txt', sep=r'\s+', header=None)
-rul_df = pd.read_csv('RUL_FD001.txt', sep=r'\s+', header=None)
+train_df = pd.read_csv(os.path.join(datasets_dir, 'train_FD001.txt'), sep=r'\s+', header=None)
+test_df = pd.read_csv(os.path.join(datasets_dir, 'test_FD001.txt'), sep=r'\s+', header=None)
+rul_df = pd.read_csv(os.path.join(datasets_dir, 'RUL_FD001.txt'), sep=r'\s+', header=None)
 
 # Set column names
 column_names = ['id', 'cycle'] + ['setting1', 'setting2', 'setting3'] + ['s' + str(i) for i in range(1, 22)]
@@ -34,7 +40,7 @@
 test_df[cols_normalize] = scaler.transform(test_df[cols_normalize])
 
 # Save the scaler
-joblib.dump(scaler, 'scaler.pkl')
+joblib.dump(scaler, os.path.join(results_dir, 'scaler.pkl'))
 
 # Dataset class
 class TurbofanDataset(Dataset):
@@ -55,37 +61,33 @@ def __getitem__(self, idx):
 
 # Definition of LSTM model
 class LSTMModel(nn.Module):
-    
     def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
         super(LSTMModel, self).__init__()
-        self.hidden_dim = hidden_dim # Number of features in the hidden state
-        self.num_layers = num_layers # Number of recurrent layers in the LSTM
-        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5) # LSTM layer
-        self.fc = nn.Linear(hidden_dim, output_dim)  # Fully connected layer for output
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5)
+        self.fc = nn.Linear(hidden_dim, output_dim)
 
     def forward(self, x):
-        # Initialize hidden state and cell state with zeros
         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
         c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
-         # Get the output from the LSTM layer
         out, _ = self.lstm(x, (h0, c0))
-         # Pass the output of the last time step through the fully connected layer
         out = self.fc(out[:, -1, :])
         return out
 
 # Training settings
-sequence_length = 50 # Number of time steps in each sequence (experimentally chosen)
-input_dim = len(cols_normalize)  # Number of input features 
-hidden_dim = 128  # Number of hidden units (experimentally chosen)
-num_layers = 3  # Number of LSTM layers (experimentally chosen)
-output_dim = 1  # Single output for RUL prediction
+sequence_length = 50
+input_dim = len(cols_normalize)
+hidden_dim = 128
+num_layers = 3
+output_dim = 1
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 # Model initialization and training settings
 model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
 model = model.to(device)
 criterion = nn.MSELoss()
-optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) # Adam optimizer with learning rate 0.0001 and weight decay applied to model parameters to prevent overfitting
+optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
 
 # Training data
 train_dataset = TurbofanDataset(train_df, sequence_length)
@@ -96,16 +98,14 @@ def forward(self, x):
 val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=4)
 
 # Model training
-def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score=0.82): #early stopping criteria, r2 score is set to 0.82
+def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score=0.82):
     train_losses = []
     val_losses = []
     val_r2_scores = []
     early_stopping_patience = 10
     early_stopping_counter = 0
     best_val_loss = float('inf')
-    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5) # min': Monitoring mode, learning rate will be adjusted based on minimizing validation loss
-#patience: Number of epochs with no improvement after which learning rate will be reduced
-#factor: Factor by which the learning rate will be reduced. New learning rate = old learning rate * factor
+    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)
 
     for epoch in range(num_epochs):
         model.train()
@@ -117,7 +117,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch
             outputs = model(sequences)
             loss = criterion(outputs.squeeze(), targets)
             loss.backward()
-            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Maximum norm value beyond which gradients are clipped to prevent them from growing too large
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
             optimizer.step()
             running_loss += loss.item()
 
@@ -150,7 +150,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch
         if val_loss < best_val_loss:
             best_val_loss = val_loss
             early_stopping_counter = 0
-            torch.save(model.state_dict(), 'model.pth')  # Save the best model
+            torch.save(model.state_dict(), os.path.join(results_dir, 'model.pth'))
         else:
             early_stopping_counter += 1
 
@@ -165,13 +165,12 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch
     return train_losses, val_losses, val_r2_scores
 
 num_epochs = 100
-target_r2_score = 0.82 #r2 score is set to 0.82
+target_r2_score = 0.82
 train_losses, val_losses, val_r2_scores = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score)
 
 def plot_training_history(train_losses, val_losses, val_r2_scores):
     epochs = range(1, len(train_losses) + 1)
 
-    # Plotting training and validation loss
     plt.figure(figsize=(14, 6))
     
     plt.subplot(1, 2, 1)
@@ -182,7 +181,6 @@ def plot_training_history(train_losses, val_losses, val_r2_scores):
     plt.ylabel('Loss')
     plt.legend()
 
-    # Plotting validation R2 score
     plt.subplot(1, 2, 2)
     plt.plot(epochs, val_r2_scores, 'g-', label='Validation R2 Score')
     plt.title('Validation R2 Score')
@@ -191,18 +189,9 @@ def plot_training_history(train_losses, val_losses, val_r2_scores):
     plt.legend()
 
     plt.tight_layout()
-    plt.savefig('training_history.png')
+    plt.savefig(os.path.join(results_dir, 'training_history.png'))
     plt.close()
 
 plot_training_history(train_losses, val_losses, val_r2_scores)
 
-# Create a zip file containing model.pth and training_history.png
-with zipfile.ZipFile('result.zip', 'w') as zipf:
-    zipf.write('model.pth')
-    zipf.write('training_history.png')
-
-# Cleanup
-os.remove('model.pth')
-os.remove('training_history.png')
-
-print("Zipped the model and training history plot into result.zip")
+print(f"Model and training history plot saved in '{results_dir}'")