-
Notifications
You must be signed in to change notification settings - Fork 0
Fix continuous RTSTRUCT index bug #75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
95586e5
5193fb4
a0771c6
cf26afc
e643349
f5882da
5495550
decf8e5
fcc1b9e
a708182
d706863
d63a1c5
484c12e
c6b945f
fc83d69
46f0773
fe56257
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
# read version from installed package | ||
from importlib.metadata import version | ||
__version__ = "1.18.0" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import pandas as pd | ||
from typing import Optional | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
import numpy as np | ||
from scipy.linalg import issymmetric | ||
|
||
|
||
def getFeatureCorrelations(vertical_features:pd.DataFrame, | ||
horizontal_features:pd.DataFrame, | ||
method:str = "pearson", | ||
vertical_feature_name:Optional[str] = "", | ||
horizontal_feature_name:Optional[str] = ""): | ||
""" Function to calculate correlation between two sets of features. | ||
|
||
Parameters | ||
---------- | ||
vertical_features : pd.DataFrame | ||
Dataframe containing features to calculate correlations with. Index must be the same as the index of the horizontal_features dataframe. | ||
horizontal_features : pd.DataFrame | ||
Dataframe containing features to calculate correlations with. Index must be the same as the index of the vertical_features dataframe. | ||
method : str | ||
Method to use for calculating correlations. Default is "pearson". | ||
vertical_feature_name : str | ||
Name of the vertical features to use as suffix in correlation dataframe. Default is blank "". | ||
horizontal_feature_name : str | ||
Name of the horizontal features to use as suffix in correlation dataframe. Default is blank "". | ||
|
||
Returns | ||
------- | ||
correlation_matrix : pd.DataFrame | ||
Dataframe containing correlation values. | ||
""" | ||
# Check that features are dataframes | ||
assert isinstance(vertical_features, pd.DataFrame), "vertical_features must be a pandas DataFrame" | ||
assert isinstance(horizontal_features, pd.DataFrame), "horizontal_features must be a pandas DataFrame" | ||
|
||
if method not in ["pearson", "spearman", "kendall"]: | ||
raise ValueError("Correlation method must be one of 'pearson', 'spearman', or 'kendall'.") | ||
|
||
if not vertical_features.index.equals(horizontal_features.index): | ||
raise ValueError("Vertical and horizontal features must have the same index to calculate correlation. Set the index to the intersection of patient IDs.") | ||
|
||
# Add _ to beginnging of feature names if they are not blank so they can be used as suffixes | ||
if vertical_feature_name: vertical_feature_name = f"_{vertical_feature_name}" | ||
if horizontal_feature_name: horizontal_feature_name = f"_{horizontal_feature_name}" | ||
|
||
# Join the features into one dataframe | ||
# Use inner join to keep only the rows that have a value in both vertical and horizontal features | ||
features_to_correlate = vertical_features.join(horizontal_features, | ||
how='inner', | ||
lsuffix=vertical_feature_name, | ||
rsuffix=horizontal_feature_name) | ||
|
||
try: | ||
# Calculate correlation between vertical features and horizontal features | ||
correlation_matrix = features_to_correlate.corr(method=method) | ||
except Exception as e: | ||
raise ValueError(f"Error calculating correlation matrix: {e}") | ||
|
||
return correlation_matrix | ||
|
||
|
||
def plotCorrelationHeatmap(correlation_matrix_df:pd.DataFrame, | ||
diagonal:Optional[bool] = False, | ||
triangle:Optional[str] = "lower", | ||
cmap:Optional[str] = "nipy_spectral", | ||
xlabel:Optional[str] = "", | ||
ylabel:Optional[str] = "", | ||
title:Optional[str] = "", | ||
subtitle:Optional[str] = "", | ||
show_tick_labels:Optional[bool] = False | ||
): | ||
"""Function to plot a correlation heatmap. | ||
|
||
Parameters | ||
---------- | ||
correlation_matrix_df : pd.DataFrame | ||
Dataframe containing the correlation matrix to plot. | ||
diagonal : bool, optional | ||
Whether to only plot half of the matrix. The default is False. | ||
triangle : str, optional | ||
Which triangle half of the matrixto plot. The default is "lower". | ||
xlabel : str, optional | ||
Label for the x-axis. The default is "". | ||
ylabel : str, optional | ||
Label for the y-axis. The default is "". | ||
title : str, optional | ||
Title for the plot. The default is "". | ||
subtitle : str, optional | ||
Subtitle for the plot. The default is "". | ||
show_tick_labels : bool, optional | ||
Whether to show the tick labels on the x and y axes. These would be the feature names. The default is False. | ||
|
||
Returns | ||
------- | ||
corr_fig : matplotlib.pyplot.figure | ||
Figure object containing a Seaborn heatmap. | ||
""" | ||
|
||
if diagonal: | ||
# Set up mask for hiding half the matrix in the plot | ||
if triangle == "lower": | ||
# Mask out the upper right triangle half of the matrix | ||
mask = np.triu(correlation_matrix_df) | ||
elif triangle == "upper": | ||
# Mask out the lower left triangle half of the matrix | ||
mask = np.tril(correlation_matrix_df) | ||
else: | ||
raise ValueError("If diagonal is True, triangle must be either 'lower' or 'upper'.") | ||
else: | ||
# The entire correlation matrix will be visisble in the plot | ||
mask = None | ||
|
||
# Set a default title if one is not provided | ||
if not title: | ||
title = "Correlation Heatmap" | ||
|
||
# Set up figure and axes for the plot | ||
corr_fig, corr_ax = plt.subplots() | ||
|
||
# Plot the correlation matrix | ||
corr_ax = sns.heatmap(correlation_matrix_df, | ||
mask = mask, | ||
cmap=cmap, | ||
vmin=-1.0, | ||
vmax=1.0) | ||
|
||
if not show_tick_labels: | ||
# Remove the individual feature names from the axes | ||
corr_ax.set_xticklabels(labels=[]) | ||
corr_ax.set_yticklabels(labels=[]) | ||
|
||
# Set axis labels | ||
corr_ax.set_xlabel(xlabel) | ||
corr_ax.set_ylabel(ylabel) | ||
|
||
# Set title and subtitle | ||
# Suptitle is the super title, which will be above the title | ||
plt.title(subtitle, fontsize=12) | ||
plt.suptitle(title, fontsize=14) | ||
|
||
return corr_fig |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from pandas import DataFrame | ||
from typing import Optional | ||
|
||
def dropUpToFeature(dataframe:DataFrame, | ||
feature_name:str, | ||
keep_feature_name_column:Optional[bool] = False | ||
): | ||
""" Function to drop all columns up to and possibly including the specified feature. | ||
|
||
Parameters | ||
---------- | ||
dataframe : DataFrame | ||
Dataframe to drop columns from. | ||
feature_name : str | ||
Name of the feature to drop up to. | ||
keep_feature_name_column : bool, optional | ||
Whether to keep the specified feature name column in the dataframe or drop it. The default is False. | ||
|
||
Returns | ||
------- | ||
dataframe : DataFrame | ||
Dataframe with all columns up to and including the specified feature dropped. | ||
""" | ||
try: | ||
if keep_feature_name_column: | ||
# Get the column names up to but not including the specified feature | ||
column_names = dataframe.columns.to_list()[:dataframe.columns.get_loc(feature_name)] | ||
else: | ||
# Get the column names up to and including the specified feature | ||
column_names = dataframe.columns.to_list()[:dataframe.columns.get_loc(feature_name)+1] | ||
|
||
# Drop all columns up to and including the specified feature | ||
dataframe_dropped_columns = dataframe.drop(columns=column_names) | ||
|
||
return dataframe_dropped_columns | ||
|
||
except KeyError: | ||
print(f"Feature {feature_name} was not found as a column in dataframe. No columns dropped.") | ||
return dataframe | ||
|
||
except Exception as e: | ||
print(f"An error occurred: {e}") | ||
return None | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import os | ||
import pandas as pd | ||
|
||
from typing import Optional, Dict | ||
|
||
from readii.io.loaders.general import loadFileToDataFrame | ||
|
||
|
||
def loadFeatureFilesFromImageTypes(extracted_feature_dir:str, | ||
image_types:Optional[list]=['original'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Avoid mutable default arguments Using mutable default arguments like lists can lead to unexpected behavior because default arguments are evaluated only once at function definition time. Change the default arguments to def loadFeatureFilesFromImageTypes(extracted_feature_dir: str,
- image_types: Optional[list] = ['original'],
- drop_labels: Optional[bool] = True,
- labels_to_drop: Optional[list] = ["patient_ID", "survival_time_in_years", "survival_event_binary"]) -> Dict[str, pd.DataFrame]:
+ image_types: Optional[list] = None,
+ drop_labels: Optional[bool] = True,
+ labels_to_drop: Optional[list] = None) -> Dict[str, pd.DataFrame]:
"""Function to load in all the extracted imaging feature sets from a directory and return them as a dictionary of dataframes."""
+ if image_types is None:
+ image_types = ['original']
+ if labels_to_drop is None:
+ labels_to_drop = ["patient_ID", "survival_time_in_years", "survival_event_binary"] Also applies to: 12-12 🧰 Tools🪛 Ruff (0.8.0)10-10: Do not use mutable data structures for argument defaults Replace with (B006) |
||
drop_labels:Optional[bool]=True, | ||
labels_to_drop:Optional[list]=["patient_ID","survival_time_in_years","survival_event_binary"])->Dict[str,pd.DataFrame]: | ||
"""Function to load in all the extracted imaging feature sets from a directory and return them as a dictionary of dataframes. | ||
|
||
Parameters | ||
---------- | ||
extracted_feature_dir : str | ||
Path to the directory containing the extracted feature csv files | ||
image_types : list, optional | ||
List of image types to load in. The default is ['original']. | ||
drop_labels : bool, optional | ||
Whether to drop the labels from the dataframes. Use when loading labelled data from data_setup_for_modeling.ipynb. The default is True. | ||
labels_to_drop : list, optional | ||
List of labels to drop from the dataframes. The default is ["patient_ID","survival_time_in_years","survival_event_binary"] based on code | ||
in data_setup_for_modeling.ipynb. | ||
|
||
Returns | ||
------- | ||
feature_sets : dict | ||
Dictionary of dataframes containing the extracted radiomics features. | ||
""" | ||
# Initialize dictionary to store the feature sets | ||
feature_sets = {} | ||
|
||
feature_file_list = os.listdir(extracted_feature_dir) | ||
|
||
# Loop through all the files in the directory | ||
for image_type in image_types: | ||
try: | ||
# Extract the image type feature csv file from the feature directory | ||
# This should return a list of length 1, so we can just take the first element | ||
image_type_feature_file = [file for file in feature_file_list if (image_type in file) and (file.endswith(".csv"))][0] | ||
# Remove the image type file from the list of feature files | ||
feature_file_list.remove(image_type_feature_file) | ||
except Exception as e: | ||
print(f"{e}\n No {image_type} feature csv files found in {extracted_feature_dir}") | ||
# Skip to the next image type | ||
continue | ||
|
||
|
||
# Get the full path to the feature file | ||
feature_file_path = os.path.join(extracted_feature_dir, image_type_feature_file) | ||
|
||
# Load the feature data into a pandas dataframe | ||
raw_feature_data = loadFileToDataFrame(feature_file_path) | ||
|
||
try: | ||
# Drop the labels from the dataframe if specified | ||
if drop_labels: | ||
# Data is now only extracted features | ||
raw_feature_data.drop(labels_to_drop, axis=1, inplace=True) | ||
except Exception as e: | ||
print(f"{feature_file_path} does not have the labels {labels_to_drop} to drop.") | ||
# Skip to the next image type | ||
continue | ||
|
||
# Save the dataframe to the feature_sets dictionary | ||
feature_sets[image_type] = raw_feature_data | ||
|
||
return feature_sets |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,70 @@ | ||||||||
import os | ||||||||
import pandas as pd | ||||||||
import yaml | ||||||||
|
||||||||
from typing import Optional, Dict, Union | ||||||||
|
||||||||
|
||||||||
def loadImageDatasetConfig(dataset_name:str, | ||||||||
config_dir_path:str) -> dict: | ||||||||
"""Load the configuration file for a given dataset. Expects the configuration file to be named <dataset_name>.yaml. | ||||||||
|
||||||||
Parameters | ||||||||
---------- | ||||||||
dataset_name : str | ||||||||
Name of the dataset to load the configuration file for. | ||||||||
config_dir_path : str | ||||||||
Path to the directory containing the configuration files. | ||||||||
|
||||||||
Returns | ||||||||
------- | ||||||||
dict | ||||||||
Dictionary containing the configuration settings for the dataset. | ||||||||
|
||||||||
Examples | ||||||||
-------- | ||||||||
>>> config = loadImageDatasetConfig("NSCLC_Radiogenomics", "config/") | ||||||||
""" | ||||||||
# Make full path to config file | ||||||||
config_file_path = os.path.join(config_dir_path, f"{dataset_name}.yaml") | ||||||||
|
||||||||
# Check if config file exists | ||||||||
if os.path.exists(config_file_path): | ||||||||
# Load the config file | ||||||||
config = yaml.safe_load(open(config_file_path, "r")) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Use context manager when opening files It's recommended to use a context manager ( Apply this diff to use a context manager: # Load the config file
-config = yaml.safe_load(open(config_file_path, "r"))
+with open(config_file_path, "r") as file:
+ config = yaml.safe_load(file) 📝 Committable suggestion
Suggested change
🧰 Tools🪛 Ruff (0.8.0)34-34: Use a context manager for opening files (SIM115) |
||||||||
return config | ||||||||
else: | ||||||||
print(f"Config file {config_file_path} does not exist.") | ||||||||
return None | ||||||||
|
||||||||
|
||||||||
|
||||||||
def loadFileToDataFrame(file_path:str) -> pd.DataFrame: | ||||||||
"""Load data from a csv or xlsx file into a pandas dataframe. | ||||||||
|
||||||||
Parameters | ||||||||
---------- | ||||||||
file_path (str): Path to the data file. | ||||||||
|
||||||||
Returns | ||||||||
------- | ||||||||
pd.DataFrame: Dataframe containing the data from the file. | ||||||||
""" | ||||||||
# Get the file extension | ||||||||
_, file_extension = os.path.splitext(file_path) | ||||||||
|
||||||||
try: | ||||||||
# Check if the file is an Excel file | ||||||||
if file_extension == '.xlsx': | ||||||||
df = pd.read_excel(file_path) | ||||||||
# Check if the file is a CSV file | ||||||||
elif file_extension == '.csv': | ||||||||
df = pd.read_csv(file_path) | ||||||||
else: | ||||||||
raise ValueError("Unsupported file format. Please provide a .csv or .xlsx file.") | ||||||||
|
||||||||
return df | ||||||||
|
||||||||
except Exception as e: | ||||||||
print(f"An error occurred: {e}") | ||||||||
return None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Consider removing general
except
clause or provide specific handlingCatching a general exception and returning
None
can obscure unexpected errors and make debugging difficult.Apply this diff to remove the general
except
clause or handle specific exceptions:Alternatively, specify the exceptions you expect: