cindyhfls commited on
Commit
03f24d2
·
verified ·
1 Parent(s): 4e667d8

Upload run_incrementalpca.py

Browse files
AlternativeModels/IncrementalPCA/run_incrementalpca.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://stackoverflow.com/questions/31428581/incremental-pca-on-big-data
2
+ import h5py
3
+ import numpy as np
4
+ from sklearn.decomposition import IncrementalPCA
5
+ from scipy.io import loadmat
6
+ import pickle
7
+
8
+ inverse_transformation_path = "./mask/"
9
+ filestr = "washu120_subsample1_randperm0_timeseries"
10
+ n_samples = 27255 # the number of time points to get. Leave it empty to use all data
11
+ # Construct the path to the left inverse transformation matrix file and load it
12
+ left_file_path = inverse_transformation_path + "Left_fMRI2Grid_192_by_192_NN.mat"
13
+ left_data = loadmat(left_file_path)
14
+ Left_inverse_transformation = left_data["inverse_transformation"]
15
+
16
+ # Construct the path to the right inverse transformation matrix file and load it
17
+ right_file_path = inverse_transformation_path + "Right_fMRI2Grid_192_by_192_NN.mat"
18
+ right_data = loadmat(right_file_path)
19
+ Right_inverse_transformation = right_data["inverse_transformation"]
20
+
21
+ # h5filename = '/mnt/leuthardte/Active/Cindy/Data/BCP_Jan2023_10moTo20mogroupavg.h5'
22
+ # h5filename = '/mnt/leuthardte/Active/Cindy/Data/120_allsubs_corr.dconn.h5'
23
+ h5filename = (
24
+ "/mnt/leuthardte/Active/Cindy/Data/" + filestr +".h5"
25
+ )
26
+
27
+ # some parameters
28
+ batch_size = 5451 # Adjust based on your system's memory capacity
29
+
30
+ n_components = 100
31
+ ipca = IncrementalPCA(n_components=n_components)
32
+
33
+ if not n_samples:
34
+ # Get sample size
35
+ with h5py.File(h5filename, "r") as file:
36
+ n_samples = file["/LeftData"].shape[0]
37
+ print(n_samples)
38
+
39
+ # abort if sample size is not divisible by batch size
40
+ print(n_samples)
41
+ assert n_samples % batch_size == 0, "batch size not a factor of sample size"
42
+
43
+ for start_idx in range(0, n_samples, batch_size):
44
+ end_idx = start_idx + batch_size
45
+
46
+ print(start_idx)
47
+
48
+ # Load data from an HDF5 file
49
+ with h5py.File(h5filename, "r") as file:
50
+ reconL = file["/LeftData"][start_idx:end_idx, :, :, :]
51
+ reconR = file["/RightData"][start_idx:end_idx, :, :, :]
52
+
53
+ # Perform matrix and tensor manipulations
54
+ # Transpose and reshape 'reconL' and 'reconR' similar to MATLAB permute and reshape
55
+ corticalrecon_L = (
56
+ Left_inverse_transformation
57
+ @ reconL.transpose(0, 1, 2, 3).reshape(batch_size, -1).T
58
+ )
59
+ corticalrecon_R = (
60
+ Right_inverse_transformation
61
+ @ reconR.transpose(0, 1, 2, 3).reshape(batch_size, -1).T
62
+ )
63
+
64
+ # Concatenate the left and right reconstructions
65
+ recon_dtseries = np.vstack((corticalrecon_L, corticalrecon_R))
66
+ # print(recon_dtseries.shape)
67
+
68
+ # recon_dtseries[recon_dtseries == 0] = 1 2024/11/18 commented this out for the timeseries data
69
+ # 2024/06/08 make the diagonals equal to 1 instead of the 0
70
+
71
+ ipca.partial_fit(recon_dtseries.T)
72
+
73
+ principal_components = ipca.components_
74
+
75
+ # Save the trained model
76
+ with open(
77
+ "./IncrementalPCA/pca_model_"
78
+ + filestr
79
+ + "_zdim"
80
+ + str(n_components)
81
+ + ".pkl",
82
+ "wb",
83
+ ) as f:
84
+ pickle.dump(ipca, f)
85
+
86
+ print(ipca.mean_)
87
+
88
+ # np.savetxt(
89
+ # "./IncrementalPCA/principal_components_washu120_subsample10_randperm1_train100_zdim"
90
+ # + str(n_components)
91
+ # + ".txt",
92
+ # principal_components,
93
+ # fmt="%f",
94
+ # )
95
+
96
+ # obtain the latent representations with the loaded components
97
+ # z=principal_components@recon_dtseries