Enhancements

AhmedBasem20 · AhmedBasem20 · commit 469e73f1882e · 2024-08-09T21:38:15.000+03:00
diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml
@@ -47,7 +47,7 @@ jobs:
           name: 'Data'
 
       - name: 'Filter and compress results file.'
-        run: python utilities/reduce_output_size.py
+        run: python utilities/reduce_output_size.py test_output.csv test_output.csv.gz
 
       - name: move data to the dashboard folder
         run: |
diff --git a/utilities/reduce_output_size.py b/utilities/reduce_output_size.py
@@ -1,26 +1,43 @@
 import os
 import gzip
 import csv
+import sys
 
-file_path = 'test_output.csv'
+def reduce_output_file_size(input_file:str, output_file:str):
+    """
+    Simplify the data generated by the analysis pipeline by retaining only the essential information required for the frontend.
+    """
+    if os.path.exists(input_file):
+        # Open the input and output files
+        with open(input_file, 'r') as infile, gzip.open(output_file, 'wt', newline='') as outfile:
+            reader = csv.DictReader(infile)
 
-# Check if the file exists
-if os.path.exists(file_path):
-    # Open the input and output files
-    with open(file_path, 'r') as infile, gzip.open('test_output.csv.gz', 'wt', newline='') as outfile:
-        reader = csv.DictReader(infile)
+            # Drop b_values columns
+            fieldnames = [field for field in reader.fieldnames if not field.startswith('bval_')]
+            writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+            writer.writeheader()
 
-        # Drop b_values columns
-        fieldnames = [field for field in reader.fieldnames if not field.startswith('bval_')]
-        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
-        writer.writeheader()
+            columns_to_round = ['f', 'Dp', 'D', 'f_fitted', 'Dp_fitted', 'D_fitted']
 
-        columns_to_round = ['f', 'Dp', 'D', 'f_fitted', 'Dp_fitted', 'D_fitted']
-        
-        # Process each row
-        for row in reader:
-            filtered_row = {column: row[column] for column in fieldnames}
-            for column in columns_to_round:
-                if column in filtered_row:
-                    filtered_row[column] = round(float(filtered_row[column]), 4)
-            writer.writerow(filtered_row)
+            for row in reader:
+                #Delete columns starting with 'bval_'
+                for key in list(row.keys()):
+                    if key.startswith('bval_'):
+                        del row[key]
+
+                # Round values in the remaining relevant columns
+                for column in columns_to_round:
+                    if column in row:
+                        row[column] = round(float(row[column]), 4)
+                writer.writerow(row)
+    else:
+        print(f"File '{input_file}' not found.")
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("Usage: python reduce_output_size.py <input_file> <output_file>")
+        sys.exit(1)
+    
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+    reduce_output_file_size(input_file, output_file)