|
1 | 1 | import os
|
2 | 2 | import gzip
|
3 | 3 | import csv
|
| 4 | +import sys |
4 | 5 |
|
5 |
| -file_path = 'test_output.csv' |
| 6 | +def reduce_output_file_size(input_file:str, output_file:str): |
| 7 | + """ |
| 8 | + Simplify the data generated by the analysis pipeline by retaining only the essential information required for the frontend. |
| 9 | + """ |
| 10 | + if os.path.exists(input_file): |
| 11 | + # Open the input and output files |
| 12 | + with open(input_file, 'r') as infile, gzip.open(output_file, 'wt', newline='') as outfile: |
| 13 | + reader = csv.DictReader(infile) |
6 | 14 |
|
7 |
| -# Check if the file exists |
8 |
| -if os.path.exists(file_path): |
9 |
| - # Open the input and output files |
10 |
| - with open(file_path, 'r') as infile, gzip.open('test_output.csv.gz', 'wt', newline='') as outfile: |
11 |
| - reader = csv.DictReader(infile) |
| 15 | + # Drop b_values columns |
| 16 | + fieldnames = [field for field in reader.fieldnames if not field.startswith('bval_')] |
| 17 | + writer = csv.DictWriter(outfile, fieldnames=fieldnames) |
| 18 | + writer.writeheader() |
12 | 19 |
|
13 |
| - # Drop b_values columns |
14 |
| - fieldnames = [field for field in reader.fieldnames if not field.startswith('bval_')] |
15 |
| - writer = csv.DictWriter(outfile, fieldnames=fieldnames) |
16 |
| - writer.writeheader() |
| 20 | + columns_to_round = ['f', 'Dp', 'D', 'f_fitted', 'Dp_fitted', 'D_fitted'] |
17 | 21 |
|
18 |
| - columns_to_round = ['f', 'Dp', 'D', 'f_fitted', 'Dp_fitted', 'D_fitted'] |
19 |
| - |
20 |
| - # Process each row |
21 |
| - for row in reader: |
22 |
| - filtered_row = {column: row[column] for column in fieldnames} |
23 |
| - for column in columns_to_round: |
24 |
| - if column in filtered_row: |
25 |
| - filtered_row[column] = round(float(filtered_row[column]), 4) |
26 |
| - writer.writerow(filtered_row) |
| 22 | + for row in reader: |
| 23 | + #Delete columns starting with 'bval_' |
| 24 | + for key in list(row.keys()): |
| 25 | + if key.startswith('bval_'): |
| 26 | + del row[key] |
| 27 | + |
| 28 | + # Round values in the remaining relevant columns |
| 29 | + for column in columns_to_round: |
| 30 | + if column in row: |
| 31 | + row[column] = round(float(row[column]), 4) |
| 32 | + writer.writerow(row) |
| 33 | + else: |
| 34 | + print(f"File '{input_file}' not found.") |
| 35 | + |
| 36 | +if __name__ == '__main__': |
| 37 | + if len(sys.argv) != 3: |
| 38 | + print("Usage: python reduce_output_size.py <input_file> <output_file>") |
| 39 | + sys.exit(1) |
| 40 | + |
| 41 | + input_file = sys.argv[1] |
| 42 | + output_file = sys.argv[2] |
| 43 | + reduce_output_file_size(input_file, output_file) |
0 commit comments