From 57b3d33bcc91434e62281d96252749956eded350 Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Fri, 25 Nov 2022 11:17:59 +0100 Subject: [PATCH 01/12] python vs matlab --- comparison.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 comparison.py diff --git a/comparison.py b/comparison.py new file mode 100644 index 0000000..d44c0e4 --- /dev/null +++ b/comparison.py @@ -0,0 +1,66 @@ +import pandas as pd +import os +import json +#import subprocess + +repoid="ds000258" +#repourl="git@github.com:OpenNeuroDatasets/" +#wdir="/bcbl/home/home_g-m/llecca/Scripts/gift/dev" +wdir=os.getcwd() + +#subprocess.run(["cd",wdir]) +#os.system(["cd", wdir]) + +os.system("datalad install git@github.com:OpenNeuroDatasets/"+repoid+".git") +repo=os.path.join(wdir,repoid) + +subjects=[] + +for file in os.listdir(repo): + d = os.path.join(repo,file) + if os.path.isdir(d) and "sub-" in d: + subjects.append(os.path.basename(d)) + +json_files=[] +echo_times=[] + +for sbj in subjects: + for file in os.listdir(os.path.join(repo,sbj,'func')): + if file.endswith('.json'): + f=open(os.path.join(repo,sbj,'func',file)) + data=json.load(f) + echo_times.append(data["EchoTime"]) + json_files.append(file) + f.close() + +#df_echo=pd.DataFrame(list(zip(json_files,echo_times)),columns=['file','echo time']) + +df_echo = pd.DataFrame({'file': json_files, 'echo time': echo_times}) + +#write.csv(df_echo,os.path.join(wdir,'echo_times_'+repoid+'.csv'),row.names=FALSE) +df_echo.to_csv(os.path.join(wdir,'echo_times_'+repoid+'.csv'),index=False) + + +# here run matlab script + + + + + + + +#df=pd.read_csv(os.path.join(wdir,'participants.tsv'),sep='\t') +#subjects=df['participant_id'] + +#for sbj in subjects: +# print(os.path.join(sbj,'func')) +# for jsonfile in os.listdir(os.path.join(wdir,sbj,'func')): +# if jsonfile.endswith('.json'): +# f=open(jsonfile) +# data=json.load(f) +# print(data['EchoTime']) +# f.close() + + + #datalad get os.path.join(sbj,'func') + #datalad drop os.path.join(sbj,'func') From 589a4bea392ccfaf07f048578d3cdbcba18aae41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Fri, 25 Nov 2022 12:29:16 +0100 Subject: [PATCH 02/12] Made a working version of the Python side of things TODO: Call MATLAB function from the Python script. --- comparison.py | 146 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 103 insertions(+), 43 deletions(-) diff --git a/comparison.py b/comparison.py index d44c0e4..8d62974 100644 --- a/comparison.py +++ b/comparison.py @@ -1,66 +1,126 @@ -import pandas as pd -import os import json -#import subprocess +import os +import subprocess + +import nibabel as nib +import numpy as np +import pandas as pd +from scipy import io as scipy_io +from tedana.workflows import tedana as tedana_cli + +from mapca import MovingAveragePCA + +repoid = "ds000258" +wdir = "/Users/eurunuela/Downloads" + +repo = os.path.join(wdir, repoid) -repoid="ds000258" -#repourl="git@github.com:OpenNeuroDatasets/" -#wdir="/bcbl/home/home_g-m/llecca/Scripts/gift/dev" -wdir=os.getcwd() +subprocess.run( + f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True +) + +subjects = [] + +# Create pandas dataframe to store results with the following columns: +# Subject, maPCA_AIC, GIFT_AIC, maPCA_KIC, GIFT_KIC, maPCA_MDL, GIFT_MDL +results_df = pd.DataFrame( + columns=["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC", "GIFT_KIC", "maPCA_MDL", "GIFT_MDL"] +) -#subprocess.run(["cd",wdir]) -#os.system(["cd", wdir]) -os.system("datalad install git@github.com:OpenNeuroDatasets/"+repoid+".git") -repo=os.path.join(wdir,repoid) +for sbj in os.listdir(repo): + sbj_dir = os.path.join(repo, sbj) -subjects=[] + # Access subject directory + if os.path.isdir(sbj_dir) and "sub-" in sbj_dir: + echo_times = [] + func_files = [] -for file in os.listdir(repo): - d = os.path.join(repo,file) - if os.path.isdir(d) and "sub-" in d: - subjects.append(os.path.basename(d)) + subjects.append(os.path.basename(sbj_dir)) -json_files=[] -echo_times=[] + print("Downloading subject", sbj) -for sbj in subjects: - for file in os.listdir(os.path.join(repo,sbj,'func')): - if file.endswith('.json'): - f=open(os.path.join(repo,sbj,'func',file)) - data=json.load(f) - echo_times.append(data["EchoTime"]) - json_files.append(file) - f.close() + subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo) -#df_echo=pd.DataFrame(list(zip(json_files,echo_times)),columns=['file','echo time']) + print("Searching for functional files and echo times") -df_echo = pd.DataFrame({'file': json_files, 'echo time': echo_times}) + # Get functional filenames and echo times + for func_file in os.listdir(os.path.join(repo, sbj, "func")): + if func_file.endswith(".json"): + with open(os.path.join(repo, sbj, "func", func_file)) as f: + data = json.load(f) + echo_times.append(data["EchoTime"]) + elif func_file.endswith(".nii.gz"): + func_files.append(os.path.join(repo, sbj, "func", func_file)) -#write.csv(df_echo,os.path.join(wdir,'echo_times_'+repoid+'.csv'),row.names=FALSE) -df_echo.to_csv(os.path.join(wdir,'echo_times_'+repoid+'.csv'),index=False) + # Sort echo_times values from lowest to highest and multiply by 1000 + echo_times = np.array(sorted(echo_times)) * 1000 + # Sort func_files + func_files = sorted(func_files) -# here run matlab script + # Tedana output directory + tedana_output_dir = os.path.join(sbj_dir, "tedana") + print("Running tedana") + # Run tedana + tedana_cli.tedana_workflow( + data=func_files, + tes=echo_times, + out_dir=tedana_output_dir, + tedpca="mdl", + ) + # Find tedana optimally combined data and mask + tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz") + tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz") + # Read tedana optimally combined data and mask + tedana_optcom_img = nib.load(tedana_optcom) + tedana_mask_img = nib.load(tedana_mask) + # Save tedana optimally combined data and mask into mat files + tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat") + tedana_mask_mat = os.path.join(sbj_dir, "mask.mat") + print("Saving tedana optimally combined data and mask into mat files") + scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()}) + scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()}) + # Run mapca + print("Running mapca") + pca = MovingAveragePCA(normalize=True) + _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img) -#df=pd.read_csv(os.path.join(wdir,'participants.tsv'),sep='\t') -#subjects=df['participant_id'] + # Get AIC, KIC and MDL values + aic = pca.aic_ + kic = pca.kic_ + mdl = pca.mdl_ -#for sbj in subjects: -# print(os.path.join(sbj,'func')) -# for jsonfile in os.listdir(os.path.join(wdir,sbj,'func')): -# if jsonfile.endswith('.json'): -# f=open(jsonfile) -# data=json.load(f) -# print(data['EchoTime']) -# f.close() + # Remove tedana output directory and the anat and func directories + subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo) + subprocess.run(f"rm -rf {sbj}/anat", shell=True, cwd=repo) + subprocess.run(f"rm -rf {sbj}/func", shell=True, cwd=repo) + # Here run matlab script with subprocess.run + print("Running GIFT version of maPCA") - #datalad get os.path.join(sbj,'func') - #datalad drop os.path.join(sbj,'func') + # Append AIC, KIC and MDL values to a pandas dataframe + print("Appending AIC, KIC and MDL values to a pandas dataframe") + results_df = results_df.append( + { + "Subject": sbj, + "maPCA_AIC": aic, + "GIFT_AIC": 0, + "maPCA_KIC": kic, + "GIFT_KIC": 0, + "maPCA_MDL": mdl, + "GIFT_MDL": 0, + }, + ignore_index=True, + ) + + print("Subject", sbj, "done") + +# Save pandas dataframe to csv file +results_df.to_csv(os.path.join(wdir, "results.csv"), index=False) From 5405041d8ec941440afba39ac3d5efa9dd12c893 Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Mon, 28 Nov 2022 18:40:43 +0100 Subject: [PATCH 03/12] edited icatb_estimate_dimension --- icatb_estimate_dimension.m | 1005 ++++++++++++++++++++++++++++++++++++ 1 file changed, 1005 insertions(+) create mode 100644 icatb_estimate_dimension.m diff --git a/icatb_estimate_dimension.m b/icatb_estimate_dimension.m new file mode 100644 index 0000000..a206609 --- /dev/null +++ b/icatb_estimate_dimension.m @@ -0,0 +1,1005 @@ +function [comp_est_AIC, comp_est_KIC, comp_est_MDL, mdl, aic, kic] = icatb_estimate_dimension(files, maskvec, precisionType, dim_est_opts) +% function [comp_est, mdl, aic, kic] = icatb_estimate_dimension(files, maskvec) +% Select the order of the multivariate data using Information theoretic +% criteria with the option of sample dependence correction; +% Inputs: +% +% 1. files - fullfile path of data files as a character array or +% numeric array of size xdim by ydim by zdim by tdim. +% 2. maskvec - Mask file in analyze or Nifti format. Mask must have the +% same dimensions as the data or mask vector must be in logical vector format of +% of length xdim*ydim*zdim. +% 3. precisionType - Load data as double or single +% 4. dim_est_opts - Dimensionality options. Options to select method and if +% you specified method = 2 set fwhm variable as well. +% +% Output: +% 1. comp_est_AIC: estimated order using AIC +% 2. comp_est_KIC: estimated order using KIC +% 3. comp_est_MDL: estimated order using MDL +% 4. mdl - MDL vector +% 5. aic - AIC vector +% 6. kic - KIC vector + +% Please cite the following paper if you use this code for publication +% Y.-O. Li, T. Adali and V. D. Calhoun, "Estimating the number of independent +% components for fMRI data," Human Brain Mapping, vol. 28, pp. 1251--66, 2007 + + +%COPYRIGHT NOTICE +%This function is a part of GIFT software library +%Copyright (C) 2003-2009 +% +%This program is free software; you can redistribute it and/or +%modify it under the terms of the GNU General Public License +%as published by the Free Software Foundation; either version 2 +%of the License, or any later version. +% +%This program is distributed in the hope that it will be useful, +%but WITHOUT ANY WARRANTY; without even the implied warranty of +%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +%GNU General Public License for more details. +% +%You should have received a copy of the GNU General Public License +%along with this program; if not, write to the Free Software +%Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +icatb_defaults; +global DIM_ESTIMATION_OPTS; + +if (~exist('files', 'var')) + files = icatb_selectEntry('typeSelection', 'multiple', 'filter', '*.img', 'title', 'Select functional data files'); +end + +drawnow; + +if (~exist('maskvec', 'var')) + maskvec = []; +end + +if (~exist('precisionType', 'var')) + precisionType = 'double'; +end + +dim_est_method = 1; +fwhm = []; + +if (~exist('dim_est_opts', 'var') || isempty(dim_est_opts)) + dim_est_opts = DIM_ESTIMATION_OPTS; +end + +try + dim_est_method = dim_est_opts.method; +catch +end + + +% try +% dim_est_method = DIM_ESTIMATION_OPTS.method; +% catch +% if (isfield(DIM_ESTIMATION_OPTS, 'iid_sampling')) +% if (DIM_ESTIMATION_OPTS.iid_sampling == 1) +% % iid sampling on +% dim_est_method = 1; +% else +% % iid sampling off +% dim_est_method = 2; +% end +% end +% end + +try + fwhm = dim_est_opts.fwhm; +catch +end + +if (dim_est_method == 2) + if (isempty(fwhm)) + error('Please set fwhm smoothness factor using DIM_ESTIMATION_OPTS.fwhm when DIM_ESTIMATION_OPTS.method = 2'); + end +end + +if (ischar(maskvec) && endsWith(maskvec,'.mat')) + mask = load(maskvec); + maskvec = mask.data; +else + % If mask is supplied as a file, read data and convert data to + % logical + maskvec = icatb_rename_4d_file(maskvec); + % Load only the first file + maskvec = deblank(maskvec(1,:)); + maskvec = icatb_loadData(maskvec); + + maskvec = icatb_loadData(deblank(maskvec(1, :))); +end + +%% Get dimensions of data by loading first time point +if (ischar(files) && ~endsWith(files,'.mat')) + files = icatb_rename_4d_file(files); + first_file = deblank(files(1, :)); + data = icatb_loadData(first_file); + xdim = size(data, 1); ydim = size(data, 2); zdim = size(data, 3); tdim = size(files, 1); + first_file = icatb_parseExtn(first_file); + if (strcmpi(first_file(end-3:end), '.mat')) + tdim = size(data, 4); + end +else + if (ischar(files) && endsWith(files,'.mat')) + data = load(files); + data = data.data; + else + data = files; + end + + if (length(size(data)) == 4) + xdim = size(data, 1); ydim = size(data, 2); zdim = size(data, 3); tdim = size(data, 4); + else + if (length(size(maskvec)) == 3) + xdim = size(maskvec, 1); ydim = size(maskvec, 2); zdim = size(maskvec, 3); tdim = size(data, 2); + if (size(data, 1) ~= xdim*ydim*zdim) + error('Mask dimensions (x, y, z) don''t match data dimensions (x, y, z)'); + end + else + error('Cannot get the x,y,z dimension information from the mask or the data. Data must be 4D array or mask must be 3D array'); + end + end + +end + +%% Load mask and check the dimensions of the data +if (isempty(maskvec)) + + if (length(size(data)) == 4) + tmp = data(:, :, :, 1); + elseif ((length(size(data)) == 3) && (numel(data) == xdim*ydim*zdim)) + tmp = data(:); + elseif ((length(size(data)) == 2) && (numel(data) == xdim*ydim*zdim)) + tmp = data(:); + else + tmp = data(:, 1); + end + + + maskvec = (tmp(:) >= abs(mean(tmp(:)))); + +else + + % Get mask vector + maskvec = (maskvec ~= 0); + maskvec = maskvec(:); + +end + +if (length(maskvec) ~= xdim*ydim*zdim) + error('Error:MaskDIM', 'Mask dimensions (%d) doesn''t match the data (%d)', length(maskvec), xdim*ydim*zdim); +end + +%% Load data by reading timepoint at a time +if (ischar(files) && ~endsWith(files,'.mat')) + data = icatb_read_data(files, [], find(maskvec == 1), precisionType); +else + if (strcmpi(precisionType, 'single')) + data = single(data); + end + data = reshape(data, xdim*ydim*zdim, tdim); + data = data(maskvec, :); +end + +verbose = 1; + +mdl = []; +aic = []; +kic = []; + +if ((dim_est_method == 3) || (dim_est_method == 4)) + % Use ER-FM or ER-AR + + if (verbose) + disp('Using entropy rate estimator ...'); + end + + [ER_FM, ER_AR] = orderEstimation_HdsvsEig_R(data'); + + if (dim_est_method == 3) + disp('Order estimated by entropy rate based method assumes signal has finite memory length ...'); + comp_est = ER_FM; + else + disp('Order estimated by entropy rate based method assumes AR signal ...'); + comp_est = ER_AR; + end + + return; + +end + + +if (dim_est_method == 1) + + %% Perform variance normalization + if verbose + fprintf('\n Performing variance normalization ...'); + end + + for n = 1:size(data, 2) + data(:, n) = detrend(data(:, n), 0) ./ std(data(:, n)); + end + + %% (completed) + + fprintf('\n P1:'); + [V, EigenValues] = icatb_svd(data, tdim, 'verbose', 0); + EigenValues = diag(EigenValues); + EigenValues = EigenValues(end:-1:1); + dataN = data*V(:, end:-1:1); + clear V; + + %% Select Gaussian principal components for effectively i.i.d. sample estimation + if verbose + fprintf('\n Selecting Gaussian principal components ...'); + end + + %% Using 12 gaussian components from middle, top and bottom gaussian components to determine the subsampling depth. Final subsampling depth is determined using median + kurtv1 = kurtn(dataN); + kurtv1(EigenValues > mean(EigenValues)) = 1000; + idx_gauss = find(((kurtv1(:) < 0.3) .* (EigenValues > eps)) == 1); + idx = idx_gauss(:)'; + dfs = length(find(EigenValues > eps)); % degrees of freedom + minTp = 12; + + if (length(idx) >= minTp) + middle = round(length(idx)/2) + 1; + idx = [idx(1:4), idx(middle - 1:middle + 2), idx(end-3:end)]; + elseif isempty(idx) + minTp = min([minTp, dfs]); + idx = (dfs - minTp + 1:dfs); + end + + idx = unique(idx); + + %% Estimate the subsampling depth for effectively i.i.d. samples + if verbose + fprintf('\n\n Estimate effectively i.i.d. samples: '); + end + + mask_ND = reshape(maskvec, xdim, ydim, zdim); + ms = length(idx); + s = zeros(ms,1); + for i = 1:ms + x_single = zeros(xdim*ydim*zdim, 1); + x_single(maskvec) = dataN(:, idx(i)); + x_single = reshape(x_single, xdim, ydim, zdim); + s(i) = est_indp_sp(x_single); + if (i > 6) + tmpS = s(1:i); + tmpSMedian = round(median(tmpS)); + if (length(find(tmpS == tmpSMedian)) > 6) + s = tmpS; + break; + end + end + dim_n = prod(size(size(x_single))); + clear x_single; + end + clear dataN; + fprintf('\n'); + s1 = round(median(s)); + if floor((sum(maskvec)/(1*tdim))^(1/dim_n)) < s1 + s1 = floor((sum(maskvec)/(1*tdim))^(1/dim_n)); + end + N = round(sum(maskvec)/s1^dim_n); + %% (completed) + + %% Use the subsampled dataset to calculate eigen values + if verbose + fprintf('\n Perform EVD on the effectively i.i.d. samples ...'); + end + if s1 ~= 1 + mask_s = subsampling(mask_ND, s1, [1,1,1]); + mask_s_1d = reshape(mask_s, 1, prod(size(mask_s))); + dat = zeros(length(find(mask_s_1d == 1)), tdim); + for i = 1:tdim + x_single = zeros(xdim*ydim*zdim, 1); + x_single(maskvec) = data(:, i); + x_single = reshape(x_single, xdim, ydim, zdim); + dat0 = subsampling(x_single, s1, [1, 1, 1]); + dat0 = reshape(dat0, prod(size(dat0)), 1); + dat(:, i) = dat0(mask_s_1d); + clear x_single; + end + clear data; + %% Perform variance normalization + for n = 1:size(dat, 2) + dat(:, n) = detrend(dat(:, n), 0) ./ std(dat(:, n)); + end + %% (completed) + fprintf('\n P2:'); + [V, EigenValues] = icatb_svd(dat, tdim, 'verbose', 0); + EigenValues = diag(EigenValues); + EigenValues = EigenValues(end:-1:1); + %lam = EigenValues; + end + + lam = EigenValues; + clear dat; + + if verbose + fprintf('\n Effective number of i.i.d. samples: %d \n',N); + end + + %% Make eigen spectrum adjustment + if verbose + fprintf('\n Perform eigen spectrum adjustment ...'); + end + lam = eigensp_adj(lam, N, length(lam)); + %% (completed) + + if sum(imag(lam)) + error('Invalid eigen value found for the subsampled data.'); + end + + +else + %% No iid sampling + if verbose + fprintf('\n computing eigen values ...\n'); + end + + [V, EigenValues] = icatb_svd(data, tdim, 'verbose', 0); + EigenValues = diag(EigenValues); + EigenValues = EigenValues(end:-1:1); + lam = EigenValues; + N = ceil(size(data, 1) / prod(fwhm)); + +end + +%% Correction on the ill-conditioned results (when tdim is large, some least significant eigenvalues become small negative numbers) +lam(real(lam) <= eps) = min(lam(real(lam)>= eps)); + +if verbose + fprintf('\n Estimating the dimension ...'); +end +p = tdim; +aic = zeros(1, p - 1); +kic = zeros(1, p - 1); +mdl = zeros(1, p - 1); +for k = 1:p-1 + LH = log(prod( lam(k+1:end).^(1/(p-k)) )/mean(lam(k+1:end))); + mlh = 0.5*N*(p-k)*LH; + df = 1 + 0.5*k*(2*p-k+1); + aic(k) = -2*mlh + 2*df; + kic(k) = -2*mlh + 3*df; + mdl(k) = -mlh + 0.5*df*log(N); +end + +% Find the first local minimum of each ITC +itc = zeros(3, length(mdl)); +itc(1,:) = aic; +itc(2,:) = kic; +itc(3,:) = mdl; + +%% Estimated components using MDL +dlap = squeeze(itc(3, 2:end) - itc(3, 1:end-1)); +a3 = find(dlap > 0); + +dlap2 = squeeze(itc(2, 2:end) - itc(2, 1:end-1)); +a2 = find(dlap2>0); + +dlap1 = squeeze(itc(1, 2:end) - itc(1, 1:end-1)); +a1 = find(dlap1>0); + + +if isempty(a3) + comp_est_MDL = length(squeeze(itc(3, :))); +else + comp_est_MDL = a3(1); +end + +if isempty(a1) + comp_est_AIC = length(squeeze(itc(1, :))); +else + comp_est_AIC = a1(1); +end + +if isempty(a2) + comp_est_KIC = length(squeeze(itc(2, :))); +else + comp_est_KIC = a2(1); +end + +fprintf('\n'); +disp(['Estimated components with mdl is found out to be ', num2str(comp_est_MDL)]); + +fprintf('\n'); +disp(['Estimated components with aic is found out to be ', num2str(comp_est_AIC)]); + +fprintf('\n'); +disp(['Estimated components with kic is found out to be ', num2str(comp_est_KIC)]); +%fprintf('\n'); + +function out = subsampling(x,s,x0) +% Subsampling the data evenly with space 's' + +n = size(x); + +if max(size(n)) == 2 & min(n) == 1 % 1D + out = x([x0(1):s:max(n)]); + +elseif max(size(n)) == 2 & min(n) ~= 1 % 2D + out = x([x0(1):s:n(1)],[x0(2):s:n(2)]); + +elseif max(size(n)) == 3 & min(n) ~= 1 % 3D + out = x([x0(1):s:n(1)],[x0(2):s:n(2)],[x0(3):s:n(3)]); + +else + error('Unrecognized matrix dimension!(subsampling)'); + +end + + +function [s, entrate_m] = est_indp_sp(x) +% estimate the effective number of independent samples based on the maximum +% entropy rate principle of stationary random process + + +dimv = size(x); +s0 = 0; +fprintf('\n'); +for j = 1:min(dimv)-1 + x_sb = subsampling(x,j,[1,1,1]); + if j == 1 + fprintf('\n Estimating the entropy rate of the Gaussian component with subsampling depth %d,',j); + else + fprintf(' %d,',j); + end + entrate_m = entrate_sp(x_sb,1); + + ent_ref = 1.41; + if entrate_m > ent_ref + s0 = j; break; + end +end +fprintf(' Done;'); +if s0 == 0 + error('Ill conditioned data, can not estimate independent samples.(est_indp_sp)'); +else + s = s0; +end + + +function out = entrate_sp(x, sm_window) +% Calculate the entropy rate of a stationary Gaussian random process using +% spectrum estimation with smoothing window + +n = size(x); + +% Normalize x_sb to be unit variance +x_std = std(reshape(x,prod(n),1)); +if x_std < 1e-10; x_std = 1e-10; end; +x = x/x_std; + +if( sm_window == 1) + + M = ceil(n/10); + + if(max(size(n) >= 3)) + parzen_w_3 = zeros(2*n(3)-1,1); + parzen_w_3(n(3)-M(3):n(3)+M(3)) = parzen_win(2*M(3)+1); + end + + if(max(size(n) >= 2)) + parzen_w_2 = zeros(2*n(2)-1,1); + parzen_w_2(n(2)-M(2):n(2)+M(2)) = parzen_win(2*M(2)+1); + end + + if(max(size(n) >= 1)) + parzen_w_1 = zeros(2*n(1)-1,1); + parzen_w_1(n(1)-M(1):n(1)+M(1)) = parzen_win(2*M(1)+1); + end + +end + +if max(size(n)) == 2 & min(n) == 1 % 1D + xc = xcorr(x,'unbiased'); + xc = xc.*parzen_w; + xf = fftshift(fft(xc)); + +elseif max(size(n)) == 2 & min(n) ~= 1 % 2D + xc = xcorr2(x); % default option: computes raw correlations with NO + % normalization -- Matlab help on xcorr + + % Bias correction + v1 = [1:n(1),n(1)-1:-1:1]; + v2 = [1:n(2),n(2)-1:-1:1]; + + vd = v1'*v2; + xc = xc./vd; + parzen_window_2D = parzen_w_1*parzen_w_2'; + xc = xc.*parzen_window_2D; + xf = fftshift(fft2(xc)); + +elseif max(size(n)) == 3 & min(n) ~= 1 % 3D + xc = zeros(2*n-1); + for m3 = 0:n(3)-1 + temp = zeros(2*n(1:2)-1); + for k = 1:n(3)-m3 + temp = temp + xcorr2(x(:,:,k+m3),x(:,:,k)); % default option: + % computes raw correlations with NO normalization + % -- Matlab help on xcorr + end + xc(:,:,n(3)-m3) = temp; + xc(:,:,n(3)+m3) = temp; + end + + % Bias correction + v1 = [1:n(1),n(1)-1:-1:1]; + v2 = [1:n(2),n(2)-1:-1:1]; + v3 = [n(3):-1:1]; + + vd = v1'*v2; + vcu = zeros(2*n-1); + for m3 = 0:n(3)-1 + vcu(:,:,n(3)-m3) = vd*v3(m3+1); + vcu(:,:,n(3)+m3) = vd*v3(m3+1); + end + + xc = xc./vcu; + + parzen_window_2D = parzen_w_1*parzen_w_2'; + for m3 = 0:n(3)-1 + parzen_window_3D(:,:,n(3)-m3) = parzen_window_2D*parzen_w_3(n(3)-m3); + parzen_window_3D(:,:,n(3)+m3) = parzen_window_2D*parzen_w_3(n(3)+m3); + end + xc = xc.*parzen_window_3D; + + xf = fftshift(fftn(xc)); + +else + error('Unrecognized matrix dimension.'); + +end + +xf = abs(xf); +xf(xf<1e-4) = 1e-4; +out = 0.5*log(2*pi*exp(1)) + sumN(log(abs((xf))))/2/sumN(abs(xf)); + + +function w = parzen_win(n) +% PARZENWIN Parzen window. +% PARZENWIN(N) returns the N-point Parzen (de la Valle-Poussin) window in +% a column vector. + +% Check for valid window length (i.e., n < 0) +[n,w,trivialwin] = checkOrder(n); +if trivialwin, return, end; + +% Index vectors +k = -(n-1)/2:(n-1)/2; +k1 = k(k<-(n-1)/4); +k2 = k(abs(k)<=(n-1)/4); + +% Equation 37 of [1]: window defined in three sections +w1 = 2 * (1-abs(k1)/(n/2)).^3; +w2 = 1 - 6*(abs(k2)/(n/2)).^2 + 6*(abs(k2)/(n/2)).^3; +w = [w1 w2 w1(end:-1:1)]'; + + +function [n_out, w, trivalwin] = checkOrder(n_in) +% CHECKORDER Checks the order passed to the window functions. + +w = []; +trivalwin = 0; + +% Special case of negative orders: +if n_in < 0, + error('Order cannot be less than zero.'); +end + +% Check if order is already an integer or empty +% If not, round to nearest integer. +if isempty(n_in) | n_in == floor(n_in), + n_out = n_in; +else + n_out = round(n_in); + warning('Rounding order to nearest integer.'); +end + +% Special cases: +if isempty(n_out) | n_out == 0, + w = zeros(0,1); % Empty matrix: 0-by-1 + trivalwin = 1; +elseif n_out == 1, + w = 1; + trivalwin = 1; +end + + +function lam_adj = eigensp_adj(lam,n,p) +% Eigen spectrum adjustment for EVD on finite samples + +r = p/n; + +bp = (1+sqrt(r))^2; +bm = (1-sqrt(r))^2; + +vv = [bm:(bp-bm)/(5*p-1):bp]; + +gv = 1./(2*pi*r*vv).*sqrt((vv-bm).*(bp-vv)); + +gvd = zeros(size(gv)); +for i = 1:length(gv) + gvd(i) = sum(gv(1:i)); +end +gvd = gvd/max(gvd); + +lam_emp = zeros(size(lam)); +for i = 1:p + i_norm = i/p; + [minv,minx] = min(abs(i_norm-gvd)); + lam_emp(i) = vv(minx); +end + +lam_emp = rot90(lam_emp, 2); + +lam_adj = lam./lam_emp; + + +function [sum_dat] = sumN(dat) +% sum of the all elements of the dat matrix + +sum_dat = sum(dat(:)); + + +function c = xcorr2(a,b) +% two dimensional cross correlation + +if nargin == 1 + b = a; +end + +c = conv2(a, rot90(conj(b),2)); + +function kurt = kurtn(x) +% Normalized kurtosis funtion so that for a Gaussian r.v. the kurtn(g) = 0 +% Input: x 1:N vector + +kurt = zeros(1, size(x, 2)); + +for i = 1:size(x, 2) + a = detrend(x(:, i), 0); + a = a/std(a); + kurt(i) = mean(a.^4) - 3; +end + + + +function [ER_FM, ER_AR] = orderEstimation_HdsvsEig_R( data1 ) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%ENTROPY-RATE BASED ORDER SELECTION (ER_FM & ER_AR) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Input: +% data1: mixtures X +% Outputs: +% ER_FM: Order estimated by entropy rate based method that assumes signal has finite memory length +% ER_AR: Order estimated by entropy rate based method that assumes AR signal + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% References: +% +% ER_FM & ER_AR +% G.-S. Fu, M. Anderson, and T. Adali, Likelihood estimators for dependent samples +% and their application to order detection, in Signal Process, IEEE Trans. +% on, vol. 62, no. 16, pp. 4237-4244, Aug. 2014. +% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Program by Geng-Shen Fu +% Please contact me at fugengs1@umbc.edu +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +data1 = data1 - mean(data1,2)*ones(1,size(data1,2)); % remove the mean +[N T]=size(data1); + +% estimate the downsampling depth and order detection +[U1,S1,V1] = svd(data1,'econ'); +clear V1; +data1 = U1'*data1; + +[ACS DSD dsdAll] = Entropy_rate_estimator(data1); + +ER_FM_DS=zeros(1,max(DSD,100)); +for i=1:DSD + for n=1:N + H(n)=Entropy_rate_single_estimator(ACS(n,1:i:DSD)); + end; + if sum(H==Inf)>0 + continue; + end + [H index_H] = sort(H, 'descend'); + ER_FM_DS(i)=order_estimate_entropy(N, T/i, H); +end +ER_FM_DS1=ER_FM_DS(ER_FM_DS~=0); +ER_FM=round(median(ER_FM_DS1)); + +ER_AR_DS=zeros(1,max(DSD,100)); +for i=1:DSD + for n=1:N + H(n)=Entropy_rate_single_estimator_AR(data1(n,1:i:end)); + end; + if sum(H==Inf)>0 + continue; + end + [H index_H] = sort(H, 'descend'); + ER_AR_DS(i)=order_estimate_entropy(N, T/i, H); +end +ER_AR_DS1=ER_AR_DS(ER_AR_DS~=0); +ER_AR=round(median(ER_AR_DS1)); + +maxp0 = floor(2*size(data1,2)/size(data1,1))/2; +dsd = size(data1,1)/2; % the initial downsampling depth is half the number of sensors +existing_orders = []; % save all estimated orders +while 1 + % estimate orders + E_JDS = order_estimate_complex_c(size(data1,1), size(data1,2), dsd, diag(S1)); + existing_orders = [existing_orders; E_JDS]; + if sum(existing_orders==E_JDS)>=5 % return when the order detection converges + dsd = ceil(dsd); + break; + end + + % re-estimate downsampling depth + dsd = 0; + cnt1 = 0; + cnt2 = 0; % counting the MA orders reaching the upper bound + ma_order_list=[]; + for k=1:E_JDS + ma_order1=DownSampOrderEstimationR(data1(k,:)); + ma_order_list=[ma_order_list;ma_order1]; + end + dsd=median(ma_order_list); +end + +function p0 = order_estimate_complex_c( N, T0, dsd, S1 ) +% order estimation of real-valued multivariate process +% N: number of time points +% T0: original sample size +% dsd: downsampling depth +% S1: singular values +% p0: estimated order +% Program by Geng-Shen Fu: fugengs1@umbc.edu +T = T0/dsd; % effective sample size +lambda = S1.^2/T0; % eigenvalues + +DL = zeros(N,1); +for p = 0 : N-1 + + nf = 1+p*N-0.5*p*(p-1); % number of degrees of freedom + DL(p+1) = DL(p+1) + 0.5*nf*log(T)/T; + + for r = 1 : p + DL(p+1) = DL(p+1) + 0.5*log(lambda(r)); + end + + sigma2v = sum(lambda(p+1:N))/(N-p); + DL(p+1) = DL(p+1) + 0.5*(N-p)*log(sigma2v); + +end +[dl0, p0] = min(real(DL)); +p0 = p0-1; +%figure; plot([0:N-1], real(DL),'--o') + + + +function [ACS dsd ma_order_list] = Entropy_rate_estimator( data ) +N = size(data,1); +T = size(data, 2); +ma_order_list=[]; + +for i = 1:N + dsd=DownSampOrderEstimationR(data(i,:)); + ma_order_list = [ma_order_list;dsd]; +end; +% ma_order_list=[40 35 30 25 10 10 10 10 10 10]'; + +dsd=round(mean(ma_order_list)); +dsd_max=max(ma_order_list); + +% ma_order_list(ma_order_listinf + p0=inf; + return; + end + end +end + + +function s = isWhiteR(x) +% hypothesis: white process vs colored process +% if accept white, return 1; otherwise, return 0 +% Program by Xi-Lin Li: lixilin@umbc.edu +T=length(x); + +% DL of white case +p=0; +dl0=log(mean(abs(x).^2)); + +% DL of colored case with AR order 1 +p=1; +x1 = [0,x(1:end-1)]; +a = x*x1'/(x1*x1'); +n = x-a*x1; +dl1 = log(mean(abs(n).^2)) + p*log(T)/T/2; + +s=(dl0 Date: Mon, 28 Nov 2022 18:41:50 +0100 Subject: [PATCH 04/12] added gift-execution --- comparison.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/comparison.py b/comparison.py index 8d62974..9b7eeb9 100644 --- a/comparison.py +++ b/comparison.py @@ -14,6 +14,7 @@ wdir = "/Users/eurunuela/Downloads" repo = os.path.join(wdir, repoid) +gift = "/bcbl/home/home_g-m/llecca/Scripts/gift" subprocess.run( f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True @@ -104,6 +105,16 @@ # Here run matlab script with subprocess.run print("Running GIFT version of maPCA") + + cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));sprintf(\'Subject dir: %s\',\'{datadir}\');[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"' + + giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat')) + + proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, err = proc.communicate() + print(output.decode('utf-8')) # Append AIC, KIC and MDL values to a pandas dataframe print("Appending AIC, KIC and MDL values to a pandas dataframe") @@ -111,11 +122,11 @@ { "Subject": sbj, "maPCA_AIC": aic, - "GIFT_AIC": 0, + "GIFT_AIC": giftmat['comp_est_AIC'][0][0], "maPCA_KIC": kic, - "GIFT_KIC": 0, + "GIFT_KIC": giftmat['comp_est_KIC'][0][0], "maPCA_MDL": mdl, - "GIFT_MDL": 0, + "GIFT_MDL": giftmat['compt_est_MDL'][0][0], }, ignore_index=True, ) From 936a715782ead36c10e8c23aff65496b6dcfaf55 Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Mon, 28 Nov 2022 18:50:53 +0100 Subject: [PATCH 05/12] read gift.mat after --- comparison.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comparison.py b/comparison.py index 9b7eeb9..41a39dd 100644 --- a/comparison.py +++ b/comparison.py @@ -108,14 +108,14 @@ cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));sprintf(\'Subject dir: %s\',\'{datadir}\');[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"' - giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat')) - proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, err = proc.communicate() print(output.decode('utf-8')) - + + giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat')) + # Append AIC, KIC and MDL values to a pandas dataframe print("Appending AIC, KIC and MDL values to a pandas dataframe") results_df = results_df.append( From 67749e3404789208c0b05ca349e75efe1213b2e9 Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Mon, 28 Nov 2022 20:29:33 +0100 Subject: [PATCH 06/12] edit --- comparison.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comparison.py b/comparison.py index 41a39dd..8f760f1 100644 --- a/comparison.py +++ b/comparison.py @@ -106,7 +106,7 @@ # Here run matlab script with subprocess.run print("Running GIFT version of maPCA") - cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));sprintf(\'Subject dir: %s\',\'{datadir}\');[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"' + cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"' proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE, stdout=subprocess.PIPE, From 9e9367ee97faa0a189f9fcb1ef5bc8ed74a812e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Wed, 30 Nov 2022 09:49:50 +0100 Subject: [PATCH 07/12] Apply normalization in space like GIFT --- mapca/mapca.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index 3de0b3e..65e0132 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -152,7 +152,7 @@ def _fit(self, img, mask): self.scaler_ = StandardScaler(with_mean=True, with_std=True) if self.normalize: # TODO: determine if tedana is already normalizing before this - X = self.scaler_.fit_transform(X.T).T # This was X_sc + X = self.scaler_.fit_transform(X) # This was X_sc # X = ((X.T - X.T.mean(axis=0)) / X.T.std(axis=0)).T LGR.info("Performing SVD on original data...") @@ -228,7 +228,7 @@ def _fit(self, img, mask): # Perform Variance Normalization temp_scaler = StandardScaler(with_mean=True, with_std=True) - dat = temp_scaler.fit_transform(dat.T).T + dat = temp_scaler.fit_transform(dat) # (completed) LGR.info("Performing SVD on subsampled i.i.d. data...") From 6213c4edcd8dc1e1084652150e00a26df72da9ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Wed, 30 Nov 2022 09:50:00 +0100 Subject: [PATCH 08/12] Renamed comparison file --- comparison.py => mapca_on_openneuro.py | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) rename comparison.py => mapca_on_openneuro.py (82%) diff --git a/comparison.py b/mapca_on_openneuro.py similarity index 82% rename from comparison.py rename to mapca_on_openneuro.py index 8f760f1..7e23374 100644 --- a/comparison.py +++ b/mapca_on_openneuro.py @@ -105,28 +105,28 @@ # Here run matlab script with subprocess.run print("Running GIFT version of maPCA") - - cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"' - - proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + + cmd = f"matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r \"try;addpath(genpath('{gift}'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension('{tedana_optcom_mat}','{tedana_mask_mat}','double',3);save('{sbj_dir}/gift.mat','comp_est_AIC','comp_est_KIC','comp_est_MDL');end;quit\"" + + proc = subprocess.Popen( + cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) output, err = proc.communicate() - print(output.decode('utf-8')) - - giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat')) - + print(output.decode("utf-8")) + + giftmat = scipy_io.loadmat(os.path.join(sbj_dir, "gift.mat")) + # Append AIC, KIC and MDL values to a pandas dataframe print("Appending AIC, KIC and MDL values to a pandas dataframe") results_df = results_df.append( { "Subject": sbj, "maPCA_AIC": aic, - "GIFT_AIC": giftmat['comp_est_AIC'][0][0], + "GIFT_AIC": giftmat["comp_est_AIC"][0][0], "maPCA_KIC": kic, - "GIFT_KIC": giftmat['comp_est_KIC'][0][0], + "GIFT_KIC": giftmat["comp_est_KIC"][0][0], "maPCA_MDL": mdl, - "GIFT_MDL": giftmat['compt_est_MDL'][0][0], + "GIFT_MDL": giftmat["compt_est_MDL"][0][0], }, ignore_index=True, ) @@ -134,4 +134,4 @@ print("Subject", sbj, "done") # Save pandas dataframe to csv file -results_df.to_csv(os.path.join(wdir, "results.csv"), index=False) +results_df.to_csv(os.path.join(wdir, "results_python.csv"), index=False) From b5207845caba1f86322a24cea3fdb0f130cb8f59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= Date: Wed, 30 Nov 2022 09:50:52 +0100 Subject: [PATCH 09/12] Make tedana mask binary --- mapca_on_openneuro.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mapca_on_openneuro.py b/mapca_on_openneuro.py index 7e23374..8f6b67e 100644 --- a/mapca_on_openneuro.py +++ b/mapca_on_openneuro.py @@ -81,6 +81,9 @@ tedana_optcom_img = nib.load(tedana_optcom) tedana_mask_img = nib.load(tedana_mask) + # Make mask binary + tedana_mask_img = nib.Nifti1Image(tedana_mask_img.get_fdata() > 0, tedana_mask_img.affine) + # Save tedana optimally combined data and mask into mat files tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat") tedana_mask_mat = os.path.join(sbj_dir, "mask.mat") From 0d18f7257f22ee361563ea97a2fd61dea6cc9e8c Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Wed, 30 Nov 2022 13:57:54 +0100 Subject: [PATCH 10/12] edits for matlab2014 --- icatb_estimate_dimension.m | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/icatb_estimate_dimension.m b/icatb_estimate_dimension.m index a206609..fb982e3 100644 --- a/icatb_estimate_dimension.m +++ b/icatb_estimate_dimension.m @@ -99,9 +99,9 @@ end end -if (ischar(maskvec) && endsWith(maskvec,'.mat')) +if (ischar(maskvec) && strcmp(maskvec(end-3:end), '.mat')) mask = load(maskvec); - maskvec = mask.data; + maskvec = mask.mask; else % If mask is supplied as a file, read data and convert data to % logical @@ -114,7 +114,7 @@ end %% Get dimensions of data by loading first time point -if (ischar(files) && ~endsWith(files,'.mat')) +if (ischar(files) && ~strcmp(files(end-3:end), '.mat')) files = icatb_rename_4d_file(files); first_file = deblank(files(1, :)); data = icatb_loadData(first_file); @@ -124,7 +124,7 @@ tdim = size(data, 4); end else - if (ischar(files) && endsWith(files,'.mat')) + if (ischar(files) && strcmp(files(end-3:end), '.mat')) data = load(files); data = data.data; else @@ -175,7 +175,7 @@ end %% Load data by reading timepoint at a time -if (ischar(files) && ~endsWith(files,'.mat')) +if (ischar(files) && ~strcmp(files(end-3:end), '.mat')) data = icatb_read_data(files, [], find(maskvec == 1), precisionType); else if (strcmpi(precisionType, 'single')) From e52cc561bb764aa7caba7a3ab26393e4ffe768a9 Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Wed, 30 Nov 2022 14:03:28 +0100 Subject: [PATCH 11/12] mask for pca and gift execution --- mapca_on_openneuro.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/mapca_on_openneuro.py b/mapca_on_openneuro.py index 8f6b67e..be1bf29 100644 --- a/mapca_on_openneuro.py +++ b/mapca_on_openneuro.py @@ -11,14 +11,17 @@ from mapca import MovingAveragePCA repoid = "ds000258" -wdir = "/Users/eurunuela/Downloads" +wdir = "/bcbl/home/home_g-m/llecca/Scripts" repo = os.path.join(wdir, repoid) gift = "/bcbl/home/home_g-m/llecca/Scripts/gift" -subprocess.run( - f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True -) +if os.path.exists(os.path.join(wdir,repoid)): + print('Repo exists') +else: + subprocess.run( + f"cd {wdir} && datalad install https://github.com/OpenNeuroDatasets/{repoid}.git", shell=True + ) subjects = [] @@ -82,7 +85,9 @@ tedana_mask_img = nib.load(tedana_mask) # Make mask binary - tedana_mask_img = nib.Nifti1Image(tedana_mask_img.get_fdata() > 0, tedana_mask_img.affine) + mask_array = tedana_mask_img.get_fdata() + mask_array[mask_array > 0] = 1 + tedana_mask_img = nib.Nifti1Image(mask_array, tedana_mask_img.affine) # Save tedana optimally combined data and mask into mat files tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat") @@ -103,13 +108,13 @@ # Remove tedana output directory and the anat and func directories subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo) - subprocess.run(f"rm -rf {sbj}/anat", shell=True, cwd=repo) - subprocess.run(f"rm -rf {sbj}/func", shell=True, cwd=repo) + subprocess.run(f"datalad drop {sbj}/anat", shell=True, cwd=repo) + subprocess.run(f"datalad drop {sbj}/func", shell=True, cwd=repo) # Here run matlab script with subprocess.run print("Running GIFT version of maPCA") - cmd = f"matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r \"try;addpath(genpath('{gift}'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension('{tedana_optcom_mat}','{tedana_mask_mat}','double',3);save('{sbj_dir}/gift.mat','comp_est_AIC','comp_est_KIC','comp_est_MDL');end;quit\"" + cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');quit"' proc = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE @@ -129,7 +134,7 @@ "maPCA_KIC": kic, "GIFT_KIC": giftmat["comp_est_KIC"][0][0], "maPCA_MDL": mdl, - "GIFT_MDL": giftmat["compt_est_MDL"][0][0], + "GIFT_MDL": giftmat["comp_est_MDL"][0][0], }, ignore_index=True, ) From c9925cbdc20cacedec0b11fd7c251c32b9fc9124 Mon Sep 17 00:00:00 2001 From: leandrolecca Date: Sat, 3 Dec 2022 21:03:25 +0100 Subject: [PATCH 12/12] online csv update --- mapca_on_openneuro.py | 233 +++++++++++++++++++++--------------------- 1 file changed, 116 insertions(+), 117 deletions(-) diff --git a/mapca_on_openneuro.py b/mapca_on_openneuro.py index be1bf29..65a06b7 100644 --- a/mapca_on_openneuro.py +++ b/mapca_on_openneuro.py @@ -1,5 +1,6 @@ import json import os +import csv import subprocess import nibabel as nib @@ -25,121 +26,119 @@ subjects = [] -# Create pandas dataframe to store results with the following columns: +# Create .csv to store results with the following columns: # Subject, maPCA_AIC, GIFT_AIC, maPCA_KIC, GIFT_KIC, maPCA_MDL, GIFT_MDL -results_df = pd.DataFrame( - columns=["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC", "GIFT_KIC", "maPCA_MDL", "GIFT_MDL"] -) - - -for sbj in os.listdir(repo): - sbj_dir = os.path.join(repo, sbj) - - # Access subject directory - if os.path.isdir(sbj_dir) and "sub-" in sbj_dir: - echo_times = [] - func_files = [] - - subjects.append(os.path.basename(sbj_dir)) - - print("Downloading subject", sbj) - - subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo) - - print("Searching for functional files and echo times") - - # Get functional filenames and echo times - for func_file in os.listdir(os.path.join(repo, sbj, "func")): - if func_file.endswith(".json"): - with open(os.path.join(repo, sbj, "func", func_file)) as f: - data = json.load(f) - echo_times.append(data["EchoTime"]) - elif func_file.endswith(".nii.gz"): - func_files.append(os.path.join(repo, sbj, "func", func_file)) - - # Sort echo_times values from lowest to highest and multiply by 1000 - echo_times = np.array(sorted(echo_times)) * 1000 - - # Sort func_files - func_files = sorted(func_files) - - # Tedana output directory - tedana_output_dir = os.path.join(sbj_dir, "tedana") - - print("Running tedana") - - # Run tedana - tedana_cli.tedana_workflow( - data=func_files, - tes=echo_times, - out_dir=tedana_output_dir, - tedpca="mdl", - ) - - # Find tedana optimally combined data and mask - tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz") - tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz") - - # Read tedana optimally combined data and mask - tedana_optcom_img = nib.load(tedana_optcom) - tedana_mask_img = nib.load(tedana_mask) - - # Make mask binary - mask_array = tedana_mask_img.get_fdata() - mask_array[mask_array > 0] = 1 - tedana_mask_img = nib.Nifti1Image(mask_array, tedana_mask_img.affine) - - # Save tedana optimally combined data and mask into mat files - tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat") - tedana_mask_mat = os.path.join(sbj_dir, "mask.mat") - print("Saving tedana optimally combined data and mask into mat files") - scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()}) - scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()}) - - # Run mapca - print("Running mapca") - pca = MovingAveragePCA(normalize=True) - _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img) - - # Get AIC, KIC and MDL values - aic = pca.aic_ - kic = pca.kic_ - mdl = pca.mdl_ - - # Remove tedana output directory and the anat and func directories - subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo) - subprocess.run(f"datalad drop {sbj}/anat", shell=True, cwd=repo) - subprocess.run(f"datalad drop {sbj}/func", shell=True, cwd=repo) - - # Here run matlab script with subprocess.run - print("Running GIFT version of maPCA") - - cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');quit"' - - proc = subprocess.Popen( - cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - output, err = proc.communicate() - print(output.decode("utf-8")) - - giftmat = scipy_io.loadmat(os.path.join(sbj_dir, "gift.mat")) - - # Append AIC, KIC and MDL values to a pandas dataframe - print("Appending AIC, KIC and MDL values to a pandas dataframe") - results_df = results_df.append( - { - "Subject": sbj, - "maPCA_AIC": aic, - "GIFT_AIC": giftmat["comp_est_AIC"][0][0], - "maPCA_KIC": kic, - "GIFT_KIC": giftmat["comp_est_KIC"][0][0], - "maPCA_MDL": mdl, - "GIFT_MDL": giftmat["comp_est_MDL"][0][0], - }, - ignore_index=True, - ) - - print("Subject", sbj, "done") - -# Save pandas dataframe to csv file -results_df.to_csv(os.path.join(wdir, "results_python.csv"), index=False) +with open(os.path.join(wdir,"mapca_gift_openneuro.csv"),"a") as csv_file: + writer = csv.writer(csv_file,delimiter="\t") + writer.writerow(["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC", + "GIFT_KIC", "maPCA_MDL","GIFT_MDL"]) + + for sbj in os.listdir(repo): + sbj_dir = os.path.join(repo, sbj) + + # Access subject directory + if os.path.isdir(sbj_dir) and "sub-" in sbj_dir: + echo_times = [] + func_files = [] + + subjects.append(os.path.basename(sbj_dir)) + + print("Downloading subject", sbj) + + subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo) + + print("Searching for functional files and echo times") + + # Get functional filenames and echo times + for func_file in os.listdir(os.path.join(repo, sbj, "func")): + if func_file.endswith(".json"): + with open(os.path.join(repo, sbj, "func", func_file)) as f: + data = json.load(f) + echo_times.append(data["EchoTime"]) + elif func_file.endswith(".nii.gz"): + func_files.append(os.path.join(repo, sbj, "func", func_file)) + + # Sort echo_times values from lowest to highest and multiply by 1000 + echo_times = np.array(sorted(echo_times)) * 1000 + + # Sort func_files + func_files = sorted(func_files) + + # Tedana output directory + tedana_output_dir = os.path.join(sbj_dir, "tedana") + + print("Running tedana") + + # Run tedana + try: + tedana_cli.tedana_workflow( + data=func_files, + tes=echo_times, + out_dir=tedana_output_dir, + tedpca="mdl", + ) + except: + print("Something went wrong in "+sbj_dir+", check .nii.gz") + continue + + # Find tedana optimally combined data and mask + tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz") + tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz") + + # Read tedana optimally combined data and mask + tedana_optcom_img = nib.load(tedana_optcom) + tedana_mask_img = nib.load(tedana_mask) + + # Make mask binary + mask_array = tedana_mask_img.get_fdata() + mask_array[mask_array > 0] = 1 + tedana_mask_img = nib.Nifti1Image(mask_array, tedana_mask_img.affine) + + # Save tedana optimally combined data and mask into mat files + tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat") + tedana_mask_mat = os.path.join(sbj_dir, "mask.mat") + print("Saving tedana optimally combined data and mask into mat files") + scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()}) + scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()}) + + # Run mapca + print("Running mapca") + pca = MovingAveragePCA(normalize=True) + _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img) + + # Get AIC, KIC and MDL values + aic = pca.aic_ + kic = pca.kic_ + mdl = pca.mdl_ + + # Remove tedana output directory and the anat and func directories + subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo) + subprocess.run(f"datalad drop {sbj}/anat", shell=True, cwd=repo) + subprocess.run(f"datalad drop {sbj}/func", shell=True, cwd=repo) + + # Here run matlab script with subprocess.run + print("Running GIFT version of maPCA") + + cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');quit"' + + proc = subprocess.Popen( + cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + output, err = proc.communicate() + print(output.decode("utf-8")) + + giftmat = scipy_io.loadmat(os.path.join(sbj_dir, "gift.mat")) + + # Append AIC, KIC and MDL values to a pandas dataframe + print("Appending AIC, KIC and MDL values to csv file") + writer.writerow([sbj, + aic["n_components"], + giftmat["comp_est_AIC"][0][0], + kic["n_components"], + giftmat["comp_est_KIC"][0][0], + mdl["n_components"], + giftmat["comp_est_MDL"][0][0]]) + csv_file.flush() + print("Subject", sbj, "done") + + csv_file.close()