cindyhfls
/

fcMRI-VAE

Model card Files Files and versions

xet

Community

cindyhfls commited on Jun 12, 2025

Commit

03f24d2

verified ·

1 Parent(s): 4e667d8

Upload run_incrementalpca.py

Browse files

Files changed (1) hide show

AlternativeModels/IncrementalPCA/run_incrementalpca.py +97 -0

AlternativeModels/IncrementalPCA/run_incrementalpca.py ADDED Viewed

	@@ -0,0 +1,97 @@

+# https://stackoverflow.com/questions/31428581/incremental-pca-on-big-data
+import h5py
+import numpy as np
+from sklearn.decomposition import IncrementalPCA
+from scipy.io import loadmat
+import pickle
+inverse_transformation_path = "./mask/"
+filestr = "washu120_subsample1_randperm0_timeseries"
+n_samples = 27255 # the number of time points to get. Leave it empty to use all data
+# Construct the path to the left inverse transformation matrix file and load it
+left_file_path = inverse_transformation_path + "Left_fMRI2Grid_192_by_192_NN.mat"
+left_data = loadmat(left_file_path)
+Left_inverse_transformation = left_data["inverse_transformation"]
+# Construct the path to the right inverse transformation matrix file and load it
+right_file_path = inverse_transformation_path + "Right_fMRI2Grid_192_by_192_NN.mat"
+right_data = loadmat(right_file_path)
+Right_inverse_transformation = right_data["inverse_transformation"]
+# h5filename = '/mnt/leuthardte/Active/Cindy/Data/BCP_Jan2023_10moTo20mogroupavg.h5'
+# h5filename = '/mnt/leuthardte/Active/Cindy/Data/120_allsubs_corr.dconn.h5'
+h5filename = (
+    "/mnt/leuthardte/Active/Cindy/Data/" + filestr +".h5"
+)
+# some parameters
+batch_size = 5451  # Adjust based on your system's memory capacity
+n_components = 100
+ipca = IncrementalPCA(n_components=n_components)
+if not n_samples:
+    # Get sample size
+    with h5py.File(h5filename, "r") as file:
+        n_samples = file["/LeftData"].shape[0]
+        print(n_samples)
+# abort if sample size is not divisible by batch size
+print(n_samples)
+assert n_samples % batch_size == 0, "batch size not a factor of sample size"
+for start_idx in range(0, n_samples, batch_size):
+    end_idx = start_idx + batch_size
+    print(start_idx)
+    # Load data from an HDF5 file
+    with h5py.File(h5filename, "r") as file:
+        reconL = file["/LeftData"][start_idx:end_idx, :, :, :]
+        reconR = file["/RightData"][start_idx:end_idx, :, :, :]
+    # Perform matrix and tensor manipulations
+    # Transpose and reshape 'reconL' and 'reconR' similar to MATLAB permute and reshape
+    corticalrecon_L = (
+        Left_inverse_transformation
+        @ reconL.transpose(0, 1, 2, 3).reshape(batch_size, -1).T
+    )
+    corticalrecon_R = (
+        Right_inverse_transformation
+        @ reconR.transpose(0, 1, 2, 3).reshape(batch_size, -1).T
+    )
+    # Concatenate the left and right reconstructions
+    recon_dtseries = np.vstack((corticalrecon_L, corticalrecon_R))
+    # print(recon_dtseries.shape)
+    # recon_dtseries[recon_dtseries == 0] = 1 2024/11/18 commented this out for the timeseries data
+    # 2024/06/08 make the diagonals equal to 1 instead of the 0
+    ipca.partial_fit(recon_dtseries.T)
+principal_components = ipca.components_
+# Save the trained model
+with open(
+    "./IncrementalPCA/pca_model_"
+    + filestr
+    + "_zdim"
+    + str(n_components)
+    + ".pkl",
+    "wb",
+) as f:
+    pickle.dump(ipca, f)
+print(ipca.mean_)
+# np.savetxt(
+#     "./IncrementalPCA/principal_components_washu120_subsample10_randperm1_train100_zdim"
+#     + str(n_components)
+#     + ".txt",
+#     principal_components,
+#     fmt="%f",
+# )
+# obtain the latent representations with the loaded components
+# z=principal_components@recon_dtseries