From 57b3d33bcc91434e62281d96252749956eded350 Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Fri, 25 Nov 2022 11:17:59 +0100
Subject: [PATCH 01/12] python vs matlab

---
 comparison.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 comparison.py

diff --git a/comparison.py b/comparison.py
new file mode 100644
index 0000000..d44c0e4
--- /dev/null
+++ b/comparison.py
@@ -0,0 +1,66 @@
+import pandas as pd
+import os
+import json
+#import subprocess
+
+repoid="ds000258"
+#repourl="git@github.com:OpenNeuroDatasets/"
+#wdir="/bcbl/home/home_g-m/llecca/Scripts/gift/dev"
+wdir=os.getcwd()
+
+#subprocess.run(["cd",wdir])
+#os.system(["cd", wdir])
+
+os.system("datalad install git@github.com:OpenNeuroDatasets/"+repoid+".git")
+repo=os.path.join(wdir,repoid)
+
+subjects=[]
+
+for file in os.listdir(repo):
+    d = os.path.join(repo,file)
+    if os.path.isdir(d) and "sub-" in d:
+        subjects.append(os.path.basename(d))
+
+json_files=[]
+echo_times=[]
+
+for sbj in subjects:
+    for file in os.listdir(os.path.join(repo,sbj,'func')):
+        if file.endswith('.json'):
+           f=open(os.path.join(repo,sbj,'func',file))
+           data=json.load(f)
+           echo_times.append(data["EchoTime"])
+           json_files.append(file)
+           f.close()
+
+#df_echo=pd.DataFrame(list(zip(json_files,echo_times)),columns=['file','echo time'])
+
+df_echo = pd.DataFrame({'file': json_files, 'echo time': echo_times})
+
+#write.csv(df_echo,os.path.join(wdir,'echo_times_'+repoid+'.csv'),row.names=FALSE)
+df_echo.to_csv(os.path.join(wdir,'echo_times_'+repoid+'.csv'),index=False)
+
+
+# here run matlab script
+
+
+
+
+
+
+
+#df=pd.read_csv(os.path.join(wdir,'participants.tsv'),sep='\t')
+#subjects=df['participant_id']
+
+#for sbj in subjects:
+#    print(os.path.join(sbj,'func'))
+#    for jsonfile in os.listdir(os.path.join(wdir,sbj,'func')):
+#        if jsonfile.endswith('.json'):
+#            f=open(jsonfile)
+#            data=json.load(f)
+#            print(data['EchoTime'])
+#            f.close()
+
+
+    #datalad get os.path.join(sbj,'func')
+    #datalad drop os.path.join(sbj,'func')

From 589a4bea392ccfaf07f048578d3cdbcba18aae41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= <e.urunuela@icloud.com>
Date: Fri, 25 Nov 2022 12:29:16 +0100
Subject: [PATCH 02/12] Made a working version of the Python side of things

TODO: Call MATLAB function from the Python script.
---
 comparison.py | 146 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 103 insertions(+), 43 deletions(-)

diff --git a/comparison.py b/comparison.py
index d44c0e4..8d62974 100644
--- a/comparison.py
+++ b/comparison.py
@@ -1,66 +1,126 @@
-import pandas as pd
-import os
 import json
-#import subprocess
+import os
+import subprocess
+
+import nibabel as nib
+import numpy as np
+import pandas as pd
+from scipy import io as scipy_io
+from tedana.workflows import tedana as tedana_cli
+
+from mapca import MovingAveragePCA
+
+repoid = "ds000258"
+wdir = "/Users/eurunuela/Downloads"
+
+repo = os.path.join(wdir, repoid)
 
-repoid="ds000258"
-#repourl="git@github.com:OpenNeuroDatasets/"
-#wdir="/bcbl/home/home_g-m/llecca/Scripts/gift/dev"
-wdir=os.getcwd()
+subprocess.run(
+    f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True
+)
+
+subjects = []
+
+# Create pandas dataframe to store results with the following columns:
+# Subject, maPCA_AIC, GIFT_AIC, maPCA_KIC, GIFT_KIC, maPCA_MDL, GIFT_MDL
+results_df = pd.DataFrame(
+    columns=["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC", "GIFT_KIC", "maPCA_MDL", "GIFT_MDL"]
+)
 
-#subprocess.run(["cd",wdir])
-#os.system(["cd", wdir])
 
-os.system("datalad install git@github.com:OpenNeuroDatasets/"+repoid+".git")
-repo=os.path.join(wdir,repoid)
+for sbj in os.listdir(repo):
+    sbj_dir = os.path.join(repo, sbj)
 
-subjects=[]
+    # Access subject directory
+    if os.path.isdir(sbj_dir) and "sub-" in sbj_dir:
+        echo_times = []
+        func_files = []
 
-for file in os.listdir(repo):
-    d = os.path.join(repo,file)
-    if os.path.isdir(d) and "sub-" in d:
-        subjects.append(os.path.basename(d))
+        subjects.append(os.path.basename(sbj_dir))
 
-json_files=[]
-echo_times=[]
+        print("Downloading subject", sbj)
 
-for sbj in subjects:
-    for file in os.listdir(os.path.join(repo,sbj,'func')):
-        if file.endswith('.json'):
-           f=open(os.path.join(repo,sbj,'func',file))
-           data=json.load(f)
-           echo_times.append(data["EchoTime"])
-           json_files.append(file)
-           f.close()
+        subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo)
 
-#df_echo=pd.DataFrame(list(zip(json_files,echo_times)),columns=['file','echo time'])
+        print("Searching for functional files and echo times")
 
-df_echo = pd.DataFrame({'file': json_files, 'echo time': echo_times})
+        # Get functional filenames and echo times
+        for func_file in os.listdir(os.path.join(repo, sbj, "func")):
+            if func_file.endswith(".json"):
+                with open(os.path.join(repo, sbj, "func", func_file)) as f:
+                    data = json.load(f)
+                    echo_times.append(data["EchoTime"])
+            elif func_file.endswith(".nii.gz"):
+                func_files.append(os.path.join(repo, sbj, "func", func_file))
 
-#write.csv(df_echo,os.path.join(wdir,'echo_times_'+repoid+'.csv'),row.names=FALSE)
-df_echo.to_csv(os.path.join(wdir,'echo_times_'+repoid+'.csv'),index=False)
+        # Sort echo_times values from lowest to highest and multiply by 1000
+        echo_times = np.array(sorted(echo_times)) * 1000
 
+        # Sort func_files
+        func_files = sorted(func_files)
 
-# here run matlab script
+        # Tedana output directory
+        tedana_output_dir = os.path.join(sbj_dir, "tedana")
 
+        print("Running tedana")
 
+        # Run tedana
+        tedana_cli.tedana_workflow(
+            data=func_files,
+            tes=echo_times,
+            out_dir=tedana_output_dir,
+            tedpca="mdl",
+        )
 
+        # Find tedana optimally combined data and mask
+        tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz")
+        tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz")
 
+        # Read tedana optimally combined data and mask
+        tedana_optcom_img = nib.load(tedana_optcom)
+        tedana_mask_img = nib.load(tedana_mask)
 
+        # Save tedana optimally combined data and mask into mat files
+        tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat")
+        tedana_mask_mat = os.path.join(sbj_dir, "mask.mat")
+        print("Saving tedana optimally combined data and mask into mat files")
+        scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()})
+        scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()})
 
+        # Run mapca
+        print("Running mapca")
+        pca = MovingAveragePCA(normalize=True)
+        _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img)
 
-#df=pd.read_csv(os.path.join(wdir,'participants.tsv'),sep='\t')
-#subjects=df['participant_id']
+        # Get AIC, KIC and MDL values
+        aic = pca.aic_
+        kic = pca.kic_
+        mdl = pca.mdl_
 
-#for sbj in subjects:
-#    print(os.path.join(sbj,'func'))
-#    for jsonfile in os.listdir(os.path.join(wdir,sbj,'func')):
-#        if jsonfile.endswith('.json'):
-#            f=open(jsonfile)
-#            data=json.load(f)
-#            print(data['EchoTime'])
-#            f.close()
+        # Remove tedana output directory and the anat and func directories
+        subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo)
+        subprocess.run(f"rm -rf {sbj}/anat", shell=True, cwd=repo)
+        subprocess.run(f"rm -rf {sbj}/func", shell=True, cwd=repo)
 
+        # Here run matlab script with subprocess.run
+        print("Running GIFT version of maPCA")
 
-    #datalad get os.path.join(sbj,'func')
-    #datalad drop os.path.join(sbj,'func')
+        # Append AIC, KIC and MDL values to a pandas dataframe
+        print("Appending AIC, KIC and MDL values to a pandas dataframe")
+        results_df = results_df.append(
+            {
+                "Subject": sbj,
+                "maPCA_AIC": aic,
+                "GIFT_AIC": 0,
+                "maPCA_KIC": kic,
+                "GIFT_KIC": 0,
+                "maPCA_MDL": mdl,
+                "GIFT_MDL": 0,
+            },
+            ignore_index=True,
+        )
+
+        print("Subject", sbj, "done")
+
+# Save pandas dataframe to csv file
+results_df.to_csv(os.path.join(wdir, "results.csv"), index=False)

From 5405041d8ec941440afba39ac3d5efa9dd12c893 Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Mon, 28 Nov 2022 18:40:43 +0100
Subject: [PATCH 03/12] edited icatb_estimate_dimension

---
 icatb_estimate_dimension.m | 1005 ++++++++++++++++++++++++++++++++++++
 1 file changed, 1005 insertions(+)
 create mode 100644 icatb_estimate_dimension.m

diff --git a/icatb_estimate_dimension.m b/icatb_estimate_dimension.m
new file mode 100644
index 0000000..a206609
--- /dev/null
+++ b/icatb_estimate_dimension.m
@@ -0,0 +1,1005 @@
+function [comp_est_AIC, comp_est_KIC, comp_est_MDL, mdl, aic, kic] = icatb_estimate_dimension(files, maskvec, precisionType, dim_est_opts)
+% function [comp_est, mdl, aic, kic] = icatb_estimate_dimension(files, maskvec)
+% Select the order of the multivariate data using Information theoretic
+% criteria with the option of sample dependence correction;
+% Inputs:
+%
+% 1. files - fullfile path of data files as a character array or
+% numeric array of size xdim by ydim by zdim by tdim.
+% 2. maskvec - Mask file in analyze or Nifti format. Mask must have the
+% same dimensions as the data or mask vector must be in logical vector format of
+% of length xdim*ydim*zdim.
+% 3. precisionType - Load data as double or single
+% 4. dim_est_opts - Dimensionality options. Options to select method and if
+% you specified method = 2 set fwhm variable as well.
+%
+% Output:
+% 1. comp_est_AIC: estimated order using AIC
+% 2. comp_est_KIC: estimated order using KIC
+% 3. comp_est_MDL: estimated order using MDL
+% 4. mdl - MDL vector
+% 5. aic - AIC vector
+% 6. kic - KIC vector
+
+% Please cite the following paper if you use this code for publication
+% Y.-O. Li, T. Adali and V. D. Calhoun, "Estimating the number of independent
+% components for fMRI data," Human Brain Mapping, vol. 28, pp. 1251--66, 2007
+
+
+%COPYRIGHT NOTICE
+%This function is a part of GIFT software library
+%Copyright (C) 2003-2009
+%
+%This program is free software; you can redistribute it and/or
+%modify it under the terms of the GNU General Public License
+%as published by the Free Software Foundation; either version 2
+%of the License, or any later version.
+%
+%This program is distributed in the hope that it will be useful,
+%but WITHOUT ANY WARRANTY; without even the implied warranty of
+%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+%GNU General Public License for more details.
+%
+%You should have received a copy of the GNU General Public License
+%along with this program; if not, write to the Free Software
+%Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+icatb_defaults;
+global DIM_ESTIMATION_OPTS;
+
+if (~exist('files', 'var'))
+    files = icatb_selectEntry('typeSelection', 'multiple', 'filter', '*.img', 'title', 'Select functional data files');
+end
+
+drawnow;
+
+if (~exist('maskvec', 'var'))
+    maskvec = [];
+end
+
+if (~exist('precisionType', 'var'))
+    precisionType = 'double';
+end
+
+dim_est_method = 1;
+fwhm = [];
+
+if (~exist('dim_est_opts', 'var') || isempty(dim_est_opts))
+    dim_est_opts = DIM_ESTIMATION_OPTS;
+end
+
+try
+    dim_est_method = dim_est_opts.method;
+catch
+end
+
+
+% try
+%     dim_est_method = DIM_ESTIMATION_OPTS.method;
+% catch
+%     if (isfield(DIM_ESTIMATION_OPTS, 'iid_sampling'))
+%         if (DIM_ESTIMATION_OPTS.iid_sampling == 1)
+%             % iid sampling on
+%             dim_est_method = 1;
+%         else
+%             % iid sampling off
+%             dim_est_method = 2;
+%         end
+%     end
+% end
+
+try
+    fwhm = dim_est_opts.fwhm;
+catch
+end
+
+if (dim_est_method == 2)
+    if (isempty(fwhm))
+        error('Please set fwhm smoothness factor using DIM_ESTIMATION_OPTS.fwhm when DIM_ESTIMATION_OPTS.method = 2');
+    end
+end
+
+if (ischar(maskvec) && endsWith(maskvec,'.mat'))
+    mask = load(maskvec);
+    maskvec = mask.data;
+else
+    % If mask is supplied as a file, read data and convert data to
+    % logical
+    maskvec = icatb_rename_4d_file(maskvec);
+    % Load only the first file
+    maskvec = deblank(maskvec(1,:));
+    maskvec = icatb_loadData(maskvec);
+
+    maskvec = icatb_loadData(deblank(maskvec(1, :)));
+end
+
+%% Get dimensions of data by loading first time point
+if (ischar(files) && ~endsWith(files,'.mat'))
+    files = icatb_rename_4d_file(files);
+    first_file = deblank(files(1, :));
+    data = icatb_loadData(first_file);
+    xdim = size(data, 1); ydim = size(data, 2); zdim = size(data, 3); tdim = size(files, 1);
+    first_file = icatb_parseExtn(first_file);
+    if (strcmpi(first_file(end-3:end), '.mat'))
+        tdim = size(data, 4);
+    end
+else
+    if (ischar(files) && endsWith(files,'.mat'))
+        data = load(files);
+        data = data.data;
+    else
+        data = files;
+    end
+
+    if (length(size(data)) == 4)
+        xdim = size(data, 1); ydim = size(data, 2); zdim = size(data, 3); tdim = size(data, 4);
+    else
+        if (length(size(maskvec)) == 3)
+            xdim = size(maskvec, 1); ydim = size(maskvec, 2); zdim = size(maskvec, 3); tdim = size(data, 2);
+            if (size(data, 1) ~= xdim*ydim*zdim)
+                error('Mask dimensions (x, y, z) don''t match data dimensions (x, y, z)');
+            end
+        else
+            error('Cannot get the x,y,z dimension information from the mask or the data. Data must be 4D array or mask must be 3D array');
+        end
+    end
+    
+end
+
+%% Load mask and check the dimensions of the data
+if (isempty(maskvec))
+    
+    if (length(size(data)) == 4)
+        tmp = data(:, :, :, 1);
+    elseif ((length(size(data)) == 3) && (numel(data) == xdim*ydim*zdim))
+        tmp = data(:);
+    elseif ((length(size(data)) == 2) && (numel(data) == xdim*ydim*zdim))
+        tmp = data(:);
+    else
+        tmp = data(:, 1);
+    end
+    
+    
+    maskvec = (tmp(:) >= abs(mean(tmp(:))));
+    
+else
+    
+    % Get mask vector
+    maskvec = (maskvec ~= 0);
+    maskvec = maskvec(:);
+    
+end
+
+if (length(maskvec) ~= xdim*ydim*zdim)
+    error('Error:MaskDIM', 'Mask dimensions (%d) doesn''t match the data (%d)', length(maskvec), xdim*ydim*zdim);
+end
+
+%% Load data by reading timepoint at a time
+if (ischar(files) && ~endsWith(files,'.mat'))
+    data = icatb_read_data(files, [], find(maskvec == 1), precisionType);
+else
+    if (strcmpi(precisionType, 'single'))
+        data = single(data);
+    end
+    data = reshape(data, xdim*ydim*zdim, tdim);
+    data = data(maskvec, :);
+end
+
+verbose = 1;
+
+mdl = [];
+aic = [];
+kic = [];
+
+if ((dim_est_method == 3) || (dim_est_method == 4))
+    % Use ER-FM or ER-AR
+    
+    if (verbose)
+        disp('Using entropy rate estimator ...');
+    end
+    
+    [ER_FM, ER_AR] = orderEstimation_HdsvsEig_R(data');
+    
+    if (dim_est_method == 3)
+        disp('Order estimated by entropy rate based method assumes signal has finite memory length ...');
+        comp_est = ER_FM;
+    else
+        disp('Order estimated by entropy rate based method assumes AR signal ...');
+        comp_est = ER_AR;
+    end
+    
+    return;
+    
+end
+
+
+if (dim_est_method == 1)
+    
+    %% Perform variance normalization
+    if verbose
+        fprintf('\n Performing variance normalization ...');
+    end
+    
+    for n = 1:size(data, 2)
+        data(:, n) = detrend(data(:, n), 0) ./ std(data(:, n));
+    end
+    
+    %% (completed)
+    
+    fprintf('\n P1:');
+    [V, EigenValues] = icatb_svd(data, tdim, 'verbose', 0);
+    EigenValues = diag(EigenValues);
+    EigenValues = EigenValues(end:-1:1);
+    dataN = data*V(:, end:-1:1);
+    clear V;
+    
+    %% Select Gaussian principal components for effectively i.i.d. sample estimation
+    if verbose
+        fprintf('\n Selecting Gaussian principal components ...');
+    end
+    
+    %% Using 12 gaussian components from middle, top and bottom gaussian components to determine the subsampling depth. Final subsampling depth is determined using median
+    kurtv1 = kurtn(dataN);
+    kurtv1(EigenValues > mean(EigenValues)) = 1000;
+    idx_gauss = find(((kurtv1(:) < 0.3) .* (EigenValues > eps)) == 1);
+    idx = idx_gauss(:)';
+    dfs = length(find(EigenValues > eps)); % degrees of freedom
+    minTp = 12;
+    
+    if (length(idx) >= minTp)
+        middle = round(length(idx)/2) + 1;
+        idx = [idx(1:4), idx(middle - 1:middle + 2), idx(end-3:end)];
+    elseif isempty(idx)
+        minTp = min([minTp, dfs]);
+        idx = (dfs - minTp + 1:dfs);
+    end
+    
+    idx = unique(idx);
+    
+    %% Estimate the subsampling depth for effectively i.i.d. samples
+    if verbose
+        fprintf('\n\n Estimate effectively i.i.d. samples: ');
+    end
+    
+    mask_ND = reshape(maskvec, xdim, ydim, zdim);
+    ms = length(idx);
+    s = zeros(ms,1);
+    for i = 1:ms
+        x_single = zeros(xdim*ydim*zdim, 1);
+        x_single(maskvec) = dataN(:, idx(i));
+        x_single = reshape(x_single, xdim, ydim, zdim);
+        s(i) = est_indp_sp(x_single);
+        if (i > 6)
+            tmpS = s(1:i);
+            tmpSMedian = round(median(tmpS));
+            if (length(find(tmpS == tmpSMedian)) > 6)
+                s = tmpS;
+                break;
+            end
+        end
+        dim_n = prod(size(size(x_single)));
+        clear x_single;
+    end
+    clear dataN;
+    fprintf('\n');
+    s1 = round(median(s));
+    if floor((sum(maskvec)/(1*tdim))^(1/dim_n)) < s1
+        s1 = floor((sum(maskvec)/(1*tdim))^(1/dim_n));
+    end
+    N = round(sum(maskvec)/s1^dim_n);
+    %% (completed)
+    
+    %% Use the subsampled dataset to calculate eigen values
+    if verbose
+        fprintf('\n Perform EVD on the effectively i.i.d. samples ...');
+    end
+    if s1 ~= 1
+        mask_s = subsampling(mask_ND, s1, [1,1,1]);
+        mask_s_1d = reshape(mask_s, 1, prod(size(mask_s)));
+        dat = zeros(length(find(mask_s_1d == 1)), tdim);
+        for i = 1:tdim
+            x_single = zeros(xdim*ydim*zdim, 1);
+            x_single(maskvec) = data(:, i);
+            x_single = reshape(x_single, xdim, ydim, zdim);
+            dat0 = subsampling(x_single, s1, [1, 1, 1]);
+            dat0 = reshape(dat0, prod(size(dat0)), 1);
+            dat(:, i) = dat0(mask_s_1d);
+            clear x_single;
+        end
+        clear data;
+        %% Perform variance normalization
+        for n = 1:size(dat, 2)
+            dat(:, n) = detrend(dat(:, n), 0) ./ std(dat(:, n));
+        end
+        %% (completed)
+        fprintf('\n P2:');
+        [V, EigenValues] = icatb_svd(dat, tdim, 'verbose', 0);
+        EigenValues = diag(EigenValues);
+        EigenValues = EigenValues(end:-1:1);
+        %lam = EigenValues;
+    end
+    
+    lam = EigenValues;
+    clear dat;
+    
+    if verbose
+        fprintf('\n Effective number of i.i.d. samples: %d \n',N);
+    end
+    
+    %% Make eigen spectrum adjustment
+    if verbose
+        fprintf('\n Perform eigen spectrum adjustment ...');
+    end
+    lam = eigensp_adj(lam, N, length(lam));
+    %% (completed)
+    
+    if sum(imag(lam))
+        error('Invalid eigen value found for the subsampled data.');
+    end
+    
+    
+else
+    %% No iid sampling
+    if verbose
+        fprintf('\n computing eigen values ...\n');
+    end
+    
+    [V, EigenValues] = icatb_svd(data, tdim, 'verbose', 0);
+    EigenValues = diag(EigenValues);
+    EigenValues = EigenValues(end:-1:1);
+    lam = EigenValues;
+    N = ceil(size(data, 1) / prod(fwhm));
+    
+end
+
+%% Correction on the ill-conditioned results (when tdim is large, some least significant eigenvalues become small negative numbers)
+lam(real(lam) <= eps) = min(lam(real(lam)>= eps));
+
+if verbose
+    fprintf('\n Estimating the dimension ...');
+end
+p = tdim;
+aic = zeros(1, p - 1);
+kic = zeros(1, p - 1);
+mdl = zeros(1, p - 1);
+for k = 1:p-1
+    LH = log(prod( lam(k+1:end).^(1/(p-k)) )/mean(lam(k+1:end)));
+    mlh = 0.5*N*(p-k)*LH;
+    df = 1 + 0.5*k*(2*p-k+1);
+    aic(k) =  -2*mlh + 2*df;
+    kic(k) =  -2*mlh + 3*df;
+    mdl(k) =  -mlh + 0.5*df*log(N);
+end
+
+% Find the first local minimum of each ITC
+itc = zeros(3, length(mdl));
+itc(1,:) = aic;
+itc(2,:) = kic;
+itc(3,:) = mdl;
+
+%% Estimated components using MDL
+dlap = squeeze(itc(3, 2:end) - itc(3, 1:end-1));
+a3 = find(dlap > 0);
+
+dlap2 = squeeze(itc(2, 2:end) - itc(2, 1:end-1));
+a2 = find(dlap2>0);
+
+dlap1 = squeeze(itc(1, 2:end) - itc(1, 1:end-1));
+a1 = find(dlap1>0);
+
+
+if isempty(a3)
+    comp_est_MDL = length(squeeze(itc(3, :)));
+else
+    comp_est_MDL = a3(1);
+end
+
+if isempty(a1)
+    comp_est_AIC = length(squeeze(itc(1, :)));
+else
+    comp_est_AIC = a1(1);
+end
+
+if isempty(a2)
+    comp_est_KIC = length(squeeze(itc(2, :)));
+else
+    comp_est_KIC = a2(1);
+end
+
+fprintf('\n');
+disp(['Estimated components with mdl is found out to be ', num2str(comp_est_MDL)]);
+
+fprintf('\n');
+disp(['Estimated components with aic is found out to be ', num2str(comp_est_AIC)]);
+
+fprintf('\n');
+disp(['Estimated components with kic is found out to be ', num2str(comp_est_KIC)]);
+%fprintf('\n');
+
+function out = subsampling(x,s,x0)
+% Subsampling the data evenly with space 's'
+
+n = size(x);
+
+if max(size(n)) == 2 & min(n) == 1  % 1D
+    out = x([x0(1):s:max(n)]);
+    
+elseif max(size(n)) == 2 & min(n) ~= 1  % 2D
+    out = x([x0(1):s:n(1)],[x0(2):s:n(2)]);
+    
+elseif max(size(n)) == 3 & min(n) ~= 1  % 3D
+    out = x([x0(1):s:n(1)],[x0(2):s:n(2)],[x0(3):s:n(3)]);
+    
+else
+    error('Unrecognized matrix dimension!(subsampling)');
+    
+end
+
+
+function [s, entrate_m] = est_indp_sp(x)
+% estimate the effective number of independent samples based on the maximum
+% entropy rate principle of stationary random process
+
+
+dimv = size(x);
+s0 = 0;
+fprintf('\n');
+for j = 1:min(dimv)-1
+    x_sb = subsampling(x,j,[1,1,1]);
+    if j == 1
+        fprintf('\n Estimating the entropy rate of the Gaussian component with subsampling depth %d,',j);
+    else
+        fprintf(' %d,',j);
+    end
+    entrate_m = entrate_sp(x_sb,1);
+    
+    ent_ref = 1.41;
+    if entrate_m > ent_ref
+        s0 = j; break;
+    end
+end
+fprintf(' Done;');
+if s0 == 0
+    error('Ill conditioned data, can not estimate independent samples.(est_indp_sp)');
+else
+    s = s0;
+end
+
+
+function out = entrate_sp(x, sm_window)
+% Calculate the entropy rate of a stationary Gaussian random process using
+% spectrum estimation with smoothing window
+
+n = size(x);
+
+% Normalize x_sb to be unit variance
+x_std = std(reshape(x,prod(n),1));
+if x_std < 1e-10; x_std = 1e-10; end;
+x = x/x_std;
+
+if( sm_window == 1)
+    
+    M = ceil(n/10);
+    
+    if(max(size(n) >= 3))
+        parzen_w_3 = zeros(2*n(3)-1,1);
+        parzen_w_3(n(3)-M(3):n(3)+M(3)) = parzen_win(2*M(3)+1);
+    end
+    
+    if(max(size(n) >= 2))
+        parzen_w_2 = zeros(2*n(2)-1,1);
+        parzen_w_2(n(2)-M(2):n(2)+M(2)) = parzen_win(2*M(2)+1);
+    end
+    
+    if(max(size(n) >= 1))
+        parzen_w_1 = zeros(2*n(1)-1,1);
+        parzen_w_1(n(1)-M(1):n(1)+M(1)) = parzen_win(2*M(1)+1);
+    end
+    
+end
+
+if max(size(n)) == 2 & min(n) == 1  % 1D
+    xc = xcorr(x,'unbiased');
+    xc = xc.*parzen_w;
+    xf = fftshift(fft(xc));
+    
+elseif max(size(n)) == 2 & min(n) ~= 1  % 2D
+    xc = xcorr2(x); % default option: computes raw correlations with NO
+    % normalization -- Matlab help on xcorr
+    
+    % Bias correction
+    v1 = [1:n(1),n(1)-1:-1:1];
+    v2 = [1:n(2),n(2)-1:-1:1];
+    
+    vd = v1'*v2;
+    xc = xc./vd;
+    parzen_window_2D = parzen_w_1*parzen_w_2';
+    xc = xc.*parzen_window_2D;
+    xf = fftshift(fft2(xc));
+    
+elseif max(size(n)) == 3 & min(n) ~= 1  % 3D
+    xc = zeros(2*n-1);
+    for m3 = 0:n(3)-1
+        temp = zeros(2*n(1:2)-1);
+        for k = 1:n(3)-m3
+            temp = temp + xcorr2(x(:,:,k+m3),x(:,:,k)); % default option:
+            % computes raw correlations with NO normalization
+            % -- Matlab help on xcorr
+        end
+        xc(:,:,n(3)-m3) = temp;
+        xc(:,:,n(3)+m3) = temp;
+    end
+    
+    % Bias correction
+    v1 = [1:n(1),n(1)-1:-1:1];
+    v2 = [1:n(2),n(2)-1:-1:1];
+    v3 = [n(3):-1:1];
+    
+    vd = v1'*v2;
+    vcu = zeros(2*n-1);
+    for m3 = 0:n(3)-1
+        vcu(:,:,n(3)-m3) = vd*v3(m3+1);
+        vcu(:,:,n(3)+m3) = vd*v3(m3+1);
+    end
+    
+    xc = xc./vcu;
+    
+    parzen_window_2D = parzen_w_1*parzen_w_2';
+    for m3 = 0:n(3)-1
+        parzen_window_3D(:,:,n(3)-m3) = parzen_window_2D*parzen_w_3(n(3)-m3);
+        parzen_window_3D(:,:,n(3)+m3) = parzen_window_2D*parzen_w_3(n(3)+m3);
+    end
+    xc = xc.*parzen_window_3D;
+    
+    xf = fftshift(fftn(xc));
+    
+else
+    error('Unrecognized matrix dimension.');
+    
+end
+
+xf = abs(xf);
+xf(xf<1e-4) = 1e-4;
+out = 0.5*log(2*pi*exp(1)) + sumN(log(abs((xf))))/2/sumN(abs(xf));
+
+
+function w = parzen_win(n)
+% PARZENWIN Parzen window.
+%   PARZENWIN(N) returns the N-point Parzen (de la Valle-Poussin) window in
+%   a column vector.
+
+% Check for valid window length (i.e., n < 0)
+[n,w,trivialwin] = checkOrder(n);
+if trivialwin, return, end;
+
+% Index vectors
+k = -(n-1)/2:(n-1)/2;
+k1 = k(k<-(n-1)/4);
+k2 = k(abs(k)<=(n-1)/4);
+
+% Equation 37 of [1]: window defined in three sections
+w1 = 2 * (1-abs(k1)/(n/2)).^3;
+w2 = 1 - 6*(abs(k2)/(n/2)).^2 + 6*(abs(k2)/(n/2)).^3;
+w = [w1 w2 w1(end:-1:1)]';
+
+
+function [n_out, w, trivalwin] = checkOrder(n_in)
+% CHECKORDER Checks the order passed to the window functions.
+
+w = [];
+trivalwin = 0;
+
+% Special case of negative orders:
+if n_in < 0,
+    error('Order cannot be less than zero.');
+end
+
+% Check if order is already an integer or empty
+% If not, round to nearest integer.
+if isempty(n_in) | n_in == floor(n_in),
+    n_out = n_in;
+else
+    n_out = round(n_in);
+    warning('Rounding order to nearest integer.');
+end
+
+% Special cases:
+if isempty(n_out) | n_out == 0,
+    w = zeros(0,1);               % Empty matrix: 0-by-1
+    trivalwin = 1;
+elseif n_out == 1,
+    w = 1;
+    trivalwin = 1;
+end
+
+
+function lam_adj = eigensp_adj(lam,n,p)
+% Eigen spectrum adjustment for EVD on finite samples
+
+r = p/n;
+
+bp = (1+sqrt(r))^2;
+bm = (1-sqrt(r))^2;
+
+vv = [bm:(bp-bm)/(5*p-1):bp];
+
+gv = 1./(2*pi*r*vv).*sqrt((vv-bm).*(bp-vv));
+
+gvd = zeros(size(gv));
+for i = 1:length(gv)
+    gvd(i) = sum(gv(1:i));
+end
+gvd = gvd/max(gvd);
+
+lam_emp = zeros(size(lam));
+for i = 1:p
+    i_norm = i/p;
+    [minv,minx] = min(abs(i_norm-gvd));
+    lam_emp(i) = vv(minx);
+end
+
+lam_emp = rot90(lam_emp, 2);
+
+lam_adj = lam./lam_emp;
+
+
+function [sum_dat] = sumN(dat)
+% sum of the all elements of the dat matrix
+
+sum_dat = sum(dat(:));
+
+
+function c = xcorr2(a,b)
+% two dimensional cross correlation
+
+if nargin == 1
+    b = a;
+end
+
+c = conv2(a, rot90(conj(b),2));
+
+function kurt = kurtn(x)
+% Normalized kurtosis funtion so that for a Gaussian r.v. the kurtn(g) = 0
+% Input: x 1:N vector
+
+kurt = zeros(1, size(x, 2));
+
+for i = 1:size(x, 2)
+    a = detrend(x(:, i), 0);
+    a = a/std(a);
+    kurt(i) = mean(a.^4) - 3;
+end
+
+
+
+function [ER_FM, ER_AR] = orderEstimation_HdsvsEig_R( data1 )
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%ENTROPY-RATE BASED ORDER SELECTION (ER_FM & ER_AR)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Input:
+% data1:       mixtures X
+% Outputs:
+% ER_FM:       Order estimated by entropy rate based method that assumes signal has finite memory length
+% ER_AR:       Order estimated by entropy rate based method that assumes AR signal
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% References:
+%
+% ER_FM & ER_AR
+% G.-S. Fu, M. Anderson, and T. Adali, Likelihood estimators for dependent samples
+% and their application to order detection, in Signal Process, IEEE Trans.
+% on, vol. 62, no. 16, pp. 4237-4244, Aug. 2014.
+%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Program by Geng-Shen Fu
+% Please contact me at fugengs1@umbc.edu
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+data1 = data1 - mean(data1,2)*ones(1,size(data1,2));    % remove the mean
+[N T]=size(data1);
+
+% estimate the downsampling depth and order detection
+[U1,S1,V1] = svd(data1,'econ');
+clear V1;
+data1 = U1'*data1;
+
+[ACS DSD dsdAll] = Entropy_rate_estimator(data1);
+
+ER_FM_DS=zeros(1,max(DSD,100));
+for i=1:DSD
+    for n=1:N
+        H(n)=Entropy_rate_single_estimator(ACS(n,1:i:DSD));
+    end;
+    if sum(H==Inf)>0
+        continue;
+    end
+    [H index_H] = sort(H, 'descend');
+    ER_FM_DS(i)=order_estimate_entropy(N, T/i, H);
+end
+ER_FM_DS1=ER_FM_DS(ER_FM_DS~=0);
+ER_FM=round(median(ER_FM_DS1));
+
+ER_AR_DS=zeros(1,max(DSD,100));
+for i=1:DSD
+    for n=1:N
+        H(n)=Entropy_rate_single_estimator_AR(data1(n,1:i:end));
+    end;
+    if sum(H==Inf)>0
+        continue;
+    end
+    [H index_H] = sort(H, 'descend');
+    ER_AR_DS(i)=order_estimate_entropy(N, T/i, H);
+end
+ER_AR_DS1=ER_AR_DS(ER_AR_DS~=0);
+ER_AR=round(median(ER_AR_DS1));
+
+maxp0 = floor(2*size(data1,2)/size(data1,1))/2;
+dsd = size(data1,1)/2;    % the initial downsampling depth is half the number of sensors
+existing_orders = [];   % save all estimated orders
+while 1
+    % estimate orders
+    E_JDS = order_estimate_complex_c(size(data1,1), size(data1,2), dsd, diag(S1));
+    existing_orders = [existing_orders; E_JDS];
+    if sum(existing_orders==E_JDS)>=5  % return when the order detection converges
+        dsd = ceil(dsd);
+        break;
+    end
+    
+    % re-estimate downsampling depth
+    dsd = 0;
+    cnt1 = 0;
+    cnt2 = 0;   % counting the MA orders reaching the upper bound
+    ma_order_list=[];
+    for k=1:E_JDS
+        ma_order1=DownSampOrderEstimationR(data1(k,:));
+        ma_order_list=[ma_order_list;ma_order1];
+    end
+    dsd=median(ma_order_list);
+end
+
+function p0 = order_estimate_complex_c( N, T0, dsd, S1 )
+%  order estimation of real-valued multivariate process
+%  N: number of time points
+%  T0: original sample size
+%  dsd: downsampling depth
+%  S1: singular values
+%  p0: estimated order
+% Program by Geng-Shen Fu: fugengs1@umbc.edu
+T = T0/dsd;     % effective sample size
+lambda = S1.^2/T0;   % eigenvalues
+
+DL = zeros(N,1);
+for p = 0 : N-1
+    
+    nf = 1+p*N-0.5*p*(p-1); % number of degrees of freedom
+    DL(p+1) = DL(p+1) + 0.5*nf*log(T)/T;
+    
+    for r = 1 : p
+        DL(p+1) = DL(p+1) + 0.5*log(lambda(r));
+    end
+    
+    sigma2v = sum(lambda(p+1:N))/(N-p);
+    DL(p+1) = DL(p+1) + 0.5*(N-p)*log(sigma2v);
+    
+end
+[dl0, p0] = min(real(DL));
+p0 = p0-1;
+%figure; plot([0:N-1], real(DL),'--o')
+
+
+
+function [ACS dsd ma_order_list] = Entropy_rate_estimator( data )
+N = size(data,1);
+T = size(data, 2);
+ma_order_list=[];
+
+for i = 1:N
+    dsd=DownSampOrderEstimationR(data(i,:));
+    ma_order_list = [ma_order_list;dsd];
+end;
+% ma_order_list=[40 35 30 25 10 10 10 10 10 10]';
+
+dsd=round(mean(ma_order_list));
+dsd_max=max(ma_order_list);
+
+% ma_order_list(ma_order_list<dsd)=dsd;
+% dsd=105;
+for i=1:N
+    data(i,:)=data(i,:)-mean(data(i,:));
+    acsTmp=xcorr(data(i,:),dsd_max-1,'unbiased');
+    ACS(i,:)=acsTmp(dsd_max:end);
+end;
+
+
+function [H1 E1 C1 C2 ma_order_list] = Entropy_estimate_complex( data )
+N = size(data,1);
+T = size(data, 2);
+ma_order_list=[];
+
+for i = 1:N
+    dsd=DownSampOrderEstimationR(data(i,:));
+    ma_order_list = [ma_order_list;dsd];
+end;
+% ma_order_list=[40 35 30 25 10 10 10 10 10 10]';
+
+dsd_max = max(ma_order_list);
+for i = 1:N
+    [H1(i) C1(:,:,i) C2(:,:,i)] = Entropy_single_estimate_complex(data(i,:), ma_order_list(i), dsd_max);
+    E1(i) = var(data(i,1:dsd:T), 1);
+end;
+
+function [H] = Entropy_rate_single_estimator( acs )
+% Program by Geng-Shen Fu: fugengs1@umbc.edu
+dsd=size(acs,2);
+C1=toeplitz(acs);
+if dsd~=1
+    C2=toeplitz(acs(1:end-1));
+else
+    C2=1;
+end;
+acs1=acs(end:-1:2);
+if det(C2)<1e-18
+    H=Inf;
+    return;
+end
+H= acs(1)-acs1*inv(C2)*acs1';
+% H= (abs(det(C1))/abs(det(C2)));
+
+
+function [H] = Entropy_rate_single_estimator_AR( data )
+% Program by Geng-Shen Fu: fugengs1@umbc.edu
+%AR Order Selection with Partial Autocorrelation Sequence
+T=size(data,2);
+[arcoefs,E,K] = aryule(data,15);
+
+AR_Order=find(abs(K)<1.96/sqrt(T),1)-1;
+if prod(size(AR_Order))==0
+    AR_Order=15;
+end
+[arcoefs] = aryule(data,AR_Order);
+Data_White = filter(arcoefs, 1, data);
+H=var(Data_White);
+
+% figure;
+% pacf = -K;
+% lag = 1:15;
+% stem(lag,pacf,'markerfacecolor',[0 0 1]);
+% xlabel('Lag'); ylabel('Partial Autocorrelation');
+% set(gca,'xtick',1:1:15)
+% lconf = -1.96/sqrt(T)*ones(length(lag),1);
+% uconf = 1.96/sqrt(T)*ones(length(lag),1);
+% hold on;
+% line(lag,lconf,'color',[1 0 0]);
+% line(lag,uconf,'color',[1 0 0]);
+% i=1;
+
+
+
+function [H1 C1 C2] = Entropy_single_estimate_complex( data, dsd, dsd_max )
+% Program by Geng-Shen Fu: fugengs1@umbc.edu
+data=data-mean(data);
+acs=xcorr(data,dsd-1,'unbiased');
+C1=toeplitz(acs(dsd:end));
+if dsd~=1
+    C2=toeplitz(acs(dsd:end-1));
+else
+    C2=1;
+end;
+H1= (abs(det(C1))/abs(det(C2)));
+
+acs=xcorr(data,dsd_max-1,'unbiased');
+C1=toeplitz(acs(dsd_max:end));
+if dsd_max~=1
+    C2=toeplitz(acs(dsd_max:end-1));
+else
+    C2=1;
+end;
+
+function [p0] = order_estimate_entropy( N, T0, H1 )
+%  order estimation of real-valued multivariate process
+%  N: number of time points
+%  T0: original sample size
+%  dsd: downsampling depth
+%  S1: singular values
+%  p0: estimated order
+% Program by Geng-Shen Fu: fugengs1@umbc.edu
+% T = T0/dsd;     % effective sample size
+T = T0;     % effective sample size
+lambda = H1;   % eigenvalues
+
+DL = zeros(N,1);
+for p = 0 : N-1
+    nf = 1+p*N-0.5*p*(p-1); % number of degrees of freedom
+    
+    for r = 1 : p
+        DL(p+1) = DL(p+1) + 0.5*log(lambda(r));
+    end
+    
+    sigma2v = sum(lambda(p+1:N))/(N-p);
+    DL(p+1) = DL(p+1) + 0.5*(N-p)*log(sigma2v);
+    
+    DL(p+1) = DL(p+1) + 0.5*nf*log(T)/T;
+end
+[dl0, p0] = min(real(DL));
+p0 = p0-1;
+% DL'
+%figure; plot([0:N-1], real(DL),'--o')
+
+function [p0] = order_estimate_covariance( N, T, C1, C2, dsd )
+%  order estimation of real-valued multivariate process
+%  N: number of time points
+%  T0: original sample size
+%  dsd: downsampling depth
+%  S1: singular values
+%  p0: estimated order
+% Program by Geng-Shen Fu: fugengs1@umbc.edu
+
+DL = zeros(N,1);
+for p = 0 : N-1
+    nf = p*(2*N-p-1)/2+(p+1)*dsd; % number of degrees of freedom
+    
+    for r = 1 : p
+        DL(p+1) = DL(p+1) + 0.5*log((abs(det(C1(:,:,r)))/abs(det(C2(:,:,r)))));
+    end
+    
+    C1Bar = sum(C1(:,:,p+1:N),3)/(N-p);
+    C2Bar = sum(C2(:,:,p+1:N),3)/(N-p);
+    DL(p+1) = DL(p+1) + 0.5*(N-p)*log((abs(det(C1Bar))/abs(det(C2Bar))));
+    
+    DL(p+1) = DL(p+1) + 0.5*nf*log(T)/T;
+end
+[dl0, p0] = min(real(DL));
+p0 = p0-1;
+% DL'
+%figure; plot([0:N-1], real(DL),'--o')
+
+
+%For debug
+function DisplayACS(data)
+lag=round(prod(size(data))/20);
+acs=abs(xcorr(data, lag, 'unbiased'));
+figure;
+plot(1:lag+1,acs(lag+1:end));
+
+
+
+
+function p0 = DownSampOrderEstimationR(data)
+% return the estimated order of moving average process x
+x = data - mean(data);
+x = x/std(x);
+p0 = 1;
+
+while 1
+    if isWhiteR( x(ceil(rand*p0):p0:end) ) % ceil(rand*p0) is a random offset
+        return;
+    else
+        p0=p0+1;
+        if p0>inf
+            p0=inf;
+            return;
+        end
+    end
+end
+
+
+function s = isWhiteR(x)
+% hypothesis: white process vs colored process
+% if accept white, return 1; otherwise, return 0
+% Program by Xi-Lin Li: lixilin@umbc.edu
+T=length(x);
+
+% DL of white case
+p=0;
+dl0=log(mean(abs(x).^2));
+
+% DL of colored case with AR order 1
+p=1;
+x1 = [0,x(1:end-1)];
+a = x*x1'/(x1*x1');
+n = x-a*x1;
+dl1 = log(mean(abs(n).^2)) + p*log(T)/T/2;
+
+s=(dl0<dl1);

From 05c85ef48fe949b33237958cd29cfab88d3c269d Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Mon, 28 Nov 2022 18:41:50 +0100
Subject: [PATCH 04/12] added gift-execution

---
 comparison.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/comparison.py b/comparison.py
index 8d62974..9b7eeb9 100644
--- a/comparison.py
+++ b/comparison.py
@@ -14,6 +14,7 @@
 wdir = "/Users/eurunuela/Downloads"
 
 repo = os.path.join(wdir, repoid)
+gift = "/bcbl/home/home_g-m/llecca/Scripts/gift"
 
 subprocess.run(
     f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True
@@ -104,6 +105,16 @@
 
         # Here run matlab script with subprocess.run
         print("Running GIFT version of maPCA")
+ 
+        cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));sprintf(\'Subject dir: %s\',\'{datadir}\');[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"'
+        
+        giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat'))
+
+        proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE)
+        output, err = proc.communicate()
+        print(output.decode('utf-8'))
 
         # Append AIC, KIC and MDL values to a pandas dataframe
         print("Appending AIC, KIC and MDL values to a pandas dataframe")
@@ -111,11 +122,11 @@
             {
                 "Subject": sbj,
                 "maPCA_AIC": aic,
-                "GIFT_AIC": 0,
+                "GIFT_AIC": giftmat['comp_est_AIC'][0][0],
                 "maPCA_KIC": kic,
-                "GIFT_KIC": 0,
+                "GIFT_KIC": giftmat['comp_est_KIC'][0][0],
                 "maPCA_MDL": mdl,
-                "GIFT_MDL": 0,
+                "GIFT_MDL": giftmat['compt_est_MDL'][0][0],
             },
             ignore_index=True,
         )

From 936a715782ead36c10e8c23aff65496b6dcfaf55 Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Mon, 28 Nov 2022 18:50:53 +0100
Subject: [PATCH 05/12] read gift.mat after

---
 comparison.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comparison.py b/comparison.py
index 9b7eeb9..41a39dd 100644
--- a/comparison.py
+++ b/comparison.py
@@ -108,14 +108,14 @@
  
         cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));sprintf(\'Subject dir: %s\',\'{datadir}\');[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"'
         
-        giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat'))
-
         proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
         output, err = proc.communicate()
         print(output.decode('utf-8'))
-
+        
+        giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat'))
+        
         # Append AIC, KIC and MDL values to a pandas dataframe
         print("Appending AIC, KIC and MDL values to a pandas dataframe")
         results_df = results_df.append(

From 67749e3404789208c0b05ca349e75efe1213b2e9 Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Mon, 28 Nov 2022 20:29:33 +0100
Subject: [PATCH 06/12] edit

---
 comparison.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comparison.py b/comparison.py
index 41a39dd..8f760f1 100644
--- a/comparison.py
+++ b/comparison.py
@@ -106,7 +106,7 @@
         # Here run matlab script with subprocess.run
         print("Running GIFT version of maPCA")
  
-        cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));sprintf(\'Subject dir: %s\',\'{datadir}\');[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"'
+        cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"'
         
         proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,

From 9e9367ee97faa0a189f9fcb1ef5bc8ed74a812e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= <e.urunuela@icloud.com>
Date: Wed, 30 Nov 2022 09:49:50 +0100
Subject: [PATCH 07/12] Apply normalization in space like GIFT

---
 mapca/mapca.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index 3de0b3e..65e0132 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -152,7 +152,7 @@ def _fit(self, img, mask):
         self.scaler_ = StandardScaler(with_mean=True, with_std=True)
         if self.normalize:
             # TODO: determine if tedana is already normalizing before this
-            X = self.scaler_.fit_transform(X.T).T  # This was X_sc
+            X = self.scaler_.fit_transform(X)  # This was X_sc
             # X = ((X.T - X.T.mean(axis=0)) / X.T.std(axis=0)).T
 
         LGR.info("Performing SVD on original data...")
@@ -228,7 +228,7 @@ def _fit(self, img, mask):
 
             # Perform Variance Normalization
             temp_scaler = StandardScaler(with_mean=True, with_std=True)
-            dat = temp_scaler.fit_transform(dat.T).T
+            dat = temp_scaler.fit_transform(dat)
 
             # (completed)
             LGR.info("Performing SVD on subsampled i.i.d. data...")

From 6213c4edcd8dc1e1084652150e00a26df72da9ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= <e.urunuela@icloud.com>
Date: Wed, 30 Nov 2022 09:50:00 +0100
Subject: [PATCH 08/12] Renamed comparison file

---
 comparison.py => mapca_on_openneuro.py | 28 +++++++++++++-------------
 1 file changed, 14 insertions(+), 14 deletions(-)
 rename comparison.py => mapca_on_openneuro.py (82%)

diff --git a/comparison.py b/mapca_on_openneuro.py
similarity index 82%
rename from comparison.py
rename to mapca_on_openneuro.py
index 8f760f1..7e23374 100644
--- a/comparison.py
+++ b/mapca_on_openneuro.py
@@ -105,28 +105,28 @@
 
         # Here run matlab script with subprocess.run
         print("Running GIFT version of maPCA")
- 
-        cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "try;addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');end;quit"'
-        
-        proc=subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,
-                              stdout=subprocess.PIPE,
-                              stderr=subprocess.PIPE)
+
+        cmd = f"matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r \"try;addpath(genpath('{gift}'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension('{tedana_optcom_mat}','{tedana_mask_mat}','double',3);save('{sbj_dir}/gift.mat','comp_est_AIC','comp_est_KIC','comp_est_MDL');end;quit\""
+
+        proc = subprocess.Popen(
+            cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
         output, err = proc.communicate()
-        print(output.decode('utf-8'))
-        
-        giftmat=scipy_io.loadmat(os.path.join(sbj_dir,'gift.mat'))
-        
+        print(output.decode("utf-8"))
+
+        giftmat = scipy_io.loadmat(os.path.join(sbj_dir, "gift.mat"))
+
         # Append AIC, KIC and MDL values to a pandas dataframe
         print("Appending AIC, KIC and MDL values to a pandas dataframe")
         results_df = results_df.append(
             {
                 "Subject": sbj,
                 "maPCA_AIC": aic,
-                "GIFT_AIC": giftmat['comp_est_AIC'][0][0],
+                "GIFT_AIC": giftmat["comp_est_AIC"][0][0],
                 "maPCA_KIC": kic,
-                "GIFT_KIC": giftmat['comp_est_KIC'][0][0],
+                "GIFT_KIC": giftmat["comp_est_KIC"][0][0],
                 "maPCA_MDL": mdl,
-                "GIFT_MDL": giftmat['compt_est_MDL'][0][0],
+                "GIFT_MDL": giftmat["compt_est_MDL"][0][0],
             },
             ignore_index=True,
         )
@@ -134,4 +134,4 @@
         print("Subject", sbj, "done")
 
 # Save pandas dataframe to csv file
-results_df.to_csv(os.path.join(wdir, "results.csv"), index=False)
+results_df.to_csv(os.path.join(wdir, "results_python.csv"), index=False)

From b5207845caba1f86322a24cea3fdb0f130cb8f59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eneko=20Uru=C3=B1uela?= <e.urunuela@icloud.com>
Date: Wed, 30 Nov 2022 09:50:52 +0100
Subject: [PATCH 09/12] Make tedana mask binary

---
 mapca_on_openneuro.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mapca_on_openneuro.py b/mapca_on_openneuro.py
index 7e23374..8f6b67e 100644
--- a/mapca_on_openneuro.py
+++ b/mapca_on_openneuro.py
@@ -81,6 +81,9 @@
         tedana_optcom_img = nib.load(tedana_optcom)
         tedana_mask_img = nib.load(tedana_mask)
 
+        # Make mask binary
+        tedana_mask_img = nib.Nifti1Image(tedana_mask_img.get_fdata() > 0, tedana_mask_img.affine)
+
         # Save tedana optimally combined data and mask into mat files
         tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat")
         tedana_mask_mat = os.path.join(sbj_dir, "mask.mat")

From 0d18f7257f22ee361563ea97a2fd61dea6cc9e8c Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Wed, 30 Nov 2022 13:57:54 +0100
Subject: [PATCH 10/12] edits for matlab2014

---
 icatb_estimate_dimension.m | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/icatb_estimate_dimension.m b/icatb_estimate_dimension.m
index a206609..fb982e3 100644
--- a/icatb_estimate_dimension.m
+++ b/icatb_estimate_dimension.m
@@ -99,9 +99,9 @@
     end
 end
 
-if (ischar(maskvec) && endsWith(maskvec,'.mat'))
+if (ischar(maskvec) && strcmp(maskvec(end-3:end), '.mat'))
     mask = load(maskvec);
-    maskvec = mask.data;
+    maskvec = mask.mask;
 else
     % If mask is supplied as a file, read data and convert data to
     % logical
@@ -114,7 +114,7 @@
 end
 
 %% Get dimensions of data by loading first time point
-if (ischar(files) && ~endsWith(files,'.mat'))
+if (ischar(files) && ~strcmp(files(end-3:end), '.mat'))
     files = icatb_rename_4d_file(files);
     first_file = deblank(files(1, :));
     data = icatb_loadData(first_file);
@@ -124,7 +124,7 @@
         tdim = size(data, 4);
     end
 else
-    if (ischar(files) && endsWith(files,'.mat'))
+    if (ischar(files) && strcmp(files(end-3:end), '.mat'))
         data = load(files);
         data = data.data;
     else
@@ -175,7 +175,7 @@
 end
 
 %% Load data by reading timepoint at a time
-if (ischar(files) && ~endsWith(files,'.mat'))
+if (ischar(files) && ~strcmp(files(end-3:end), '.mat'))
     data = icatb_read_data(files, [], find(maskvec == 1), precisionType);
 else
     if (strcmpi(precisionType, 'single'))

From e52cc561bb764aa7caba7a3ab26393e4ffe768a9 Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Wed, 30 Nov 2022 14:03:28 +0100
Subject: [PATCH 11/12] mask for pca and gift execution

---
 mapca_on_openneuro.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/mapca_on_openneuro.py b/mapca_on_openneuro.py
index 8f6b67e..be1bf29 100644
--- a/mapca_on_openneuro.py
+++ b/mapca_on_openneuro.py
@@ -11,14 +11,17 @@
 from mapca import MovingAveragePCA
 
 repoid = "ds000258"
-wdir = "/Users/eurunuela/Downloads"
+wdir = "/bcbl/home/home_g-m/llecca/Scripts"
 
 repo = os.path.join(wdir, repoid)
 gift = "/bcbl/home/home_g-m/llecca/Scripts/gift"
 
-subprocess.run(
-    f"cd {wdir} && datalad install git@github.com:OpenNeuroDatasets/{repoid}.git", shell=True
-)
+if os.path.exists(os.path.join(wdir,repoid)):
+    print('Repo exists')
+else:
+    subprocess.run(
+        f"cd {wdir} && datalad install https://github.com/OpenNeuroDatasets/{repoid}.git", shell=True
+    )
 
 subjects = []
 
@@ -82,7 +85,9 @@
         tedana_mask_img = nib.load(tedana_mask)
 
         # Make mask binary
-        tedana_mask_img = nib.Nifti1Image(tedana_mask_img.get_fdata() > 0, tedana_mask_img.affine)
+        mask_array = tedana_mask_img.get_fdata()
+        mask_array[mask_array > 0] = 1
+        tedana_mask_img = nib.Nifti1Image(mask_array, tedana_mask_img.affine)
 
         # Save tedana optimally combined data and mask into mat files
         tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat")
@@ -103,13 +108,13 @@
 
         # Remove tedana output directory and the anat and func directories
         subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo)
-        subprocess.run(f"rm -rf {sbj}/anat", shell=True, cwd=repo)
-        subprocess.run(f"rm -rf {sbj}/func", shell=True, cwd=repo)
+        subprocess.run(f"datalad drop {sbj}/anat", shell=True, cwd=repo)
+        subprocess.run(f"datalad drop {sbj}/func", shell=True, cwd=repo)
 
         # Here run matlab script with subprocess.run
         print("Running GIFT version of maPCA")
 
-        cmd = f"matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r \"try;addpath(genpath('{gift}'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension('{tedana_optcom_mat}','{tedana_mask_mat}','double',3);save('{sbj_dir}/gift.mat','comp_est_AIC','comp_est_KIC','comp_est_MDL');end;quit\""
+        cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');quit"'
 
         proc = subprocess.Popen(
             cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
@@ -129,7 +134,7 @@
                 "maPCA_KIC": kic,
                 "GIFT_KIC": giftmat["comp_est_KIC"][0][0],
                 "maPCA_MDL": mdl,
-                "GIFT_MDL": giftmat["compt_est_MDL"][0][0],
+                "GIFT_MDL": giftmat["comp_est_MDL"][0][0],
             },
             ignore_index=True,
         )

From c9925cbdc20cacedec0b11fd7c251c32b9fc9124 Mon Sep 17 00:00:00 2001
From: leandrolecca <lean.lecca.96@icloud.com>
Date: Sat, 3 Dec 2022 21:03:25 +0100
Subject: [PATCH 12/12] online csv update

---
 mapca_on_openneuro.py | 233 +++++++++++++++++++++---------------------
 1 file changed, 116 insertions(+), 117 deletions(-)

diff --git a/mapca_on_openneuro.py b/mapca_on_openneuro.py
index be1bf29..65a06b7 100644
--- a/mapca_on_openneuro.py
+++ b/mapca_on_openneuro.py
@@ -1,5 +1,6 @@
 import json
 import os
+import csv
 import subprocess
 
 import nibabel as nib
@@ -25,121 +26,119 @@
 
 subjects = []
 
-# Create pandas dataframe to store results with the following columns:
+# Create .csv to store results with the following columns:
 # Subject, maPCA_AIC, GIFT_AIC, maPCA_KIC, GIFT_KIC, maPCA_MDL, GIFT_MDL
-results_df = pd.DataFrame(
-    columns=["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC", "GIFT_KIC", "maPCA_MDL", "GIFT_MDL"]
-)
-
-
-for sbj in os.listdir(repo):
-    sbj_dir = os.path.join(repo, sbj)
-
-    # Access subject directory
-    if os.path.isdir(sbj_dir) and "sub-" in sbj_dir:
-        echo_times = []
-        func_files = []
-
-        subjects.append(os.path.basename(sbj_dir))
-
-        print("Downloading subject", sbj)
-
-        subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo)
-
-        print("Searching for functional files and echo times")
-
-        # Get functional filenames and echo times
-        for func_file in os.listdir(os.path.join(repo, sbj, "func")):
-            if func_file.endswith(".json"):
-                with open(os.path.join(repo, sbj, "func", func_file)) as f:
-                    data = json.load(f)
-                    echo_times.append(data["EchoTime"])
-            elif func_file.endswith(".nii.gz"):
-                func_files.append(os.path.join(repo, sbj, "func", func_file))
-
-        # Sort echo_times values from lowest to highest and multiply by 1000
-        echo_times = np.array(sorted(echo_times)) * 1000
-
-        # Sort func_files
-        func_files = sorted(func_files)
-
-        # Tedana output directory
-        tedana_output_dir = os.path.join(sbj_dir, "tedana")
-
-        print("Running tedana")
-
-        # Run tedana
-        tedana_cli.tedana_workflow(
-            data=func_files,
-            tes=echo_times,
-            out_dir=tedana_output_dir,
-            tedpca="mdl",
-        )
-
-        # Find tedana optimally combined data and mask
-        tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz")
-        tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz")
-
-        # Read tedana optimally combined data and mask
-        tedana_optcom_img = nib.load(tedana_optcom)
-        tedana_mask_img = nib.load(tedana_mask)
-
-        # Make mask binary
-        mask_array = tedana_mask_img.get_fdata()
-        mask_array[mask_array > 0] = 1
-        tedana_mask_img = nib.Nifti1Image(mask_array, tedana_mask_img.affine)
-
-        # Save tedana optimally combined data and mask into mat files
-        tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat")
-        tedana_mask_mat = os.path.join(sbj_dir, "mask.mat")
-        print("Saving tedana optimally combined data and mask into mat files")
-        scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()})
-        scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()})
-
-        # Run mapca
-        print("Running mapca")
-        pca = MovingAveragePCA(normalize=True)
-        _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img)
-
-        # Get AIC, KIC and MDL values
-        aic = pca.aic_
-        kic = pca.kic_
-        mdl = pca.mdl_
-
-        # Remove tedana output directory and the anat and func directories
-        subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo)
-        subprocess.run(f"datalad drop {sbj}/anat", shell=True, cwd=repo)
-        subprocess.run(f"datalad drop {sbj}/func", shell=True, cwd=repo)
-
-        # Here run matlab script with subprocess.run
-        print("Running GIFT version of maPCA")
-
-        cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');quit"'
-
-        proc = subprocess.Popen(
-            cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-        )
-        output, err = proc.communicate()
-        print(output.decode("utf-8"))
-
-        giftmat = scipy_io.loadmat(os.path.join(sbj_dir, "gift.mat"))
-
-        # Append AIC, KIC and MDL values to a pandas dataframe
-        print("Appending AIC, KIC and MDL values to a pandas dataframe")
-        results_df = results_df.append(
-            {
-                "Subject": sbj,
-                "maPCA_AIC": aic,
-                "GIFT_AIC": giftmat["comp_est_AIC"][0][0],
-                "maPCA_KIC": kic,
-                "GIFT_KIC": giftmat["comp_est_KIC"][0][0],
-                "maPCA_MDL": mdl,
-                "GIFT_MDL": giftmat["comp_est_MDL"][0][0],
-            },
-            ignore_index=True,
-        )
-
-        print("Subject", sbj, "done")
-
-# Save pandas dataframe to csv file
-results_df.to_csv(os.path.join(wdir, "results_python.csv"), index=False)
+with open(os.path.join(wdir,"mapca_gift_openneuro.csv"),"a") as csv_file:
+    writer = csv.writer(csv_file,delimiter="\t")
+    writer.writerow(["Subject", "maPCA_AIC", "GIFT_AIC", "maPCA_KIC",
+                     "GIFT_KIC", "maPCA_MDL","GIFT_MDL"])
+
+    for sbj in os.listdir(repo):
+        sbj_dir = os.path.join(repo, sbj)
+
+        # Access subject directory
+        if os.path.isdir(sbj_dir) and "sub-" in sbj_dir:
+            echo_times = []
+            func_files = []
+
+            subjects.append(os.path.basename(sbj_dir))
+
+            print("Downloading subject", sbj)
+
+            subprocess.run(f"datalad get {sbj}/func", shell=True, cwd=repo)
+
+            print("Searching for functional files and echo times")
+
+            # Get functional filenames and echo times
+            for func_file in os.listdir(os.path.join(repo, sbj, "func")):
+                if func_file.endswith(".json"):
+                    with open(os.path.join(repo, sbj, "func", func_file)) as f:
+                        data = json.load(f)
+                        echo_times.append(data["EchoTime"])
+                elif func_file.endswith(".nii.gz"):
+                    func_files.append(os.path.join(repo, sbj, "func", func_file))
+
+            # Sort echo_times values from lowest to highest and multiply by 1000
+            echo_times = np.array(sorted(echo_times)) * 1000
+
+            # Sort func_files
+            func_files = sorted(func_files)
+
+            # Tedana output directory
+            tedana_output_dir = os.path.join(sbj_dir, "tedana")
+
+            print("Running tedana")
+
+            # Run tedana
+            try:
+                tedana_cli.tedana_workflow(
+                    data=func_files,
+                    tes=echo_times,
+                    out_dir=tedana_output_dir,
+                    tedpca="mdl",
+                )
+            except:
+                print("Something went wrong in "+sbj_dir+", check .nii.gz")
+                continue
+
+            # Find tedana optimally combined data and mask
+            tedana_optcom = os.path.join(tedana_output_dir, "desc-optcom_bold.nii.gz")
+            tedana_mask = os.path.join(tedana_output_dir, "desc-adaptiveGoodSignal_mask.nii.gz")
+
+            # Read tedana optimally combined data and mask
+            tedana_optcom_img = nib.load(tedana_optcom)
+            tedana_mask_img = nib.load(tedana_mask)
+
+            # Make mask binary
+            mask_array = tedana_mask_img.get_fdata()
+            mask_array[mask_array > 0] = 1
+            tedana_mask_img = nib.Nifti1Image(mask_array, tedana_mask_img.affine)
+
+            # Save tedana optimally combined data and mask into mat files
+            tedana_optcom_mat = os.path.join(sbj_dir, "optcom_bold.mat")
+            tedana_mask_mat = os.path.join(sbj_dir, "mask.mat")
+            print("Saving tedana optimally combined data and mask into mat files")
+            scipy_io.savemat(tedana_optcom_mat, {"data": tedana_optcom_img.get_fdata()})
+            scipy_io.savemat(tedana_mask_mat, {"mask": tedana_mask_img.get_fdata()})
+
+            # Run mapca
+            print("Running mapca")
+            pca = MovingAveragePCA(normalize=True)
+            _ = pca.fit_transform(tedana_optcom_img, tedana_mask_img)
+
+            # Get AIC, KIC and MDL values
+            aic = pca.aic_
+            kic = pca.kic_
+            mdl = pca.mdl_
+
+            # Remove tedana output directory and the anat and func directories
+            subprocess.run(f"rm -rf {tedana_output_dir}", shell=True, cwd=repo)
+            subprocess.run(f"datalad drop {sbj}/anat", shell=True, cwd=repo)
+            subprocess.run(f"datalad drop {sbj}/func", shell=True, cwd=repo)
+
+            # Here run matlab script with subprocess.run
+            print("Running GIFT version of maPCA")
+
+            cmd = f'matlab -nodesktop -nosplash -nojvm -logfile {sbj_dir}/giftoutput.txt -r "addpath(genpath(\'{gift}\'));[comp_est_AIC,comp_est_KIC,comp_est_MDL,mdl,aic,kic]=icatb_estimate_dimension(\'{tedana_optcom_mat}\',\'{tedana_mask_mat}\',\'double\',3);save(\'{sbj_dir}/gift.mat\',\'comp_est_AIC\',\'comp_est_KIC\',\'comp_est_MDL\');quit"'
+
+            proc = subprocess.Popen(
+                cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            )
+            output, err = proc.communicate()
+            print(output.decode("utf-8"))
+
+            giftmat = scipy_io.loadmat(os.path.join(sbj_dir, "gift.mat"))
+
+            # Append AIC, KIC and MDL values to a pandas dataframe
+            print("Appending AIC, KIC and MDL values to csv file")
+            writer.writerow([sbj,
+                             aic["n_components"], 
+                             giftmat["comp_est_AIC"][0][0], 
+                             kic["n_components"],
+                             giftmat["comp_est_KIC"][0][0],
+                             mdl["n_components"],
+                             giftmat["comp_est_MDL"][0][0]])
+            csv_file.flush()
+            print("Subject", sbj, "done")
+
+    csv_file.close()