Pj12 commited on
Commit
02ee4d9
·
verified ·
1 Parent(s): cbfe346

Delete extract_feature_print.py

Browse files
Files changed (1) hide show
  1. extract_feature_print.py +0 -304
extract_feature_print.py DELETED
@@ -1,304 +0,0 @@
1
- import os, sys, traceback
2
- from transformers import HubertModel
3
- import librosa
4
- from torch import nn
5
- import torch
6
-
7
- import json
8
- os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
9
- os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
10
-
11
- device=sys.argv[1]
12
- n_part = int(sys.argv[2])
13
- i_part = int(sys.argv[3])
14
- if len(sys.argv) == 6:
15
- exp_dir = sys.argv[4]
16
- version = sys.argv[5]
17
- else:
18
- i_gpu = sys.argv[4]
19
- exp_dir = sys.argv[5]
20
- os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
21
- version = sys.argv[6]
22
- import torch
23
- import torch.nn.functional as F
24
- import soundfile as sf
25
- import numpy as np
26
- from fairseq import checkpoint_utils
27
-
28
- #device = "cpu"
29
- if torch.cuda.is_available():
30
- device = "cuda"
31
- elif torch.backends.mps.is_available():
32
- device = "mps"
33
-
34
- version_config_paths = [
35
- os.path.join("", "32k.json"),
36
- os.path.join("", "40k.json"),
37
- os.path.join("", "48k.json"),
38
- os.path.join("", "48k_v2.json"),
39
- os.path.join("", "40k.json"),
40
- os.path.join("", "32k_v2.json"),
41
- os.path.join("", "32k_v3.json"),
42
- os.path.join("", "40k_v3.json"),
43
- os.path.join("", "48k_v3.json"),
44
- ]
45
-
46
- class Config:
47
- def __init__(self):
48
- self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
49
- self.is_half = self.device != "cpu"
50
- self.gpu_name = (
51
- torch.cuda.get_device_name(int(self.device.split(":")[-1]))
52
- if self.device.startswith("cuda")
53
- else None
54
- )
55
- self.json_config = self.load_config_json()
56
- self.gpu_mem = None
57
- self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
58
-
59
- def load_config_json(self) -> dict:
60
- configs = {}
61
- for config_file in version_config_paths:
62
- config_path = os.path.join("configs", config_file)
63
- with open(config_path, "r") as f:
64
- configs[config_file] = json.load(f)
65
- return configs
66
-
67
- def has_mps(self) -> bool:
68
- # Check if Metal Performance Shaders are available - for macOS 12.3+.
69
- return torch.backends.mps.is_available()
70
-
71
- def has_xpu(self) -> bool:
72
- # Check if XPU is available.
73
- return hasattr(torch, "xpu") and torch.xpu.is_available()
74
-
75
- def set_precision(self, precision):
76
- if precision not in ["fp32", "fp16"]:
77
- raise ValueError("Invalid precision type. Must be 'fp32' or 'fp16'.")
78
-
79
- fp16_run_value = precision == "fp16"
80
- preprocess_target_version = "3.7" if precision == "fp16" else "3.0"
81
- preprocess_path = os.path.join(
82
- os.path.dirname(__file__),
83
- os.pardir,
84
- ""
85
- "preprocess.py",
86
- )
87
-
88
- for config_path in version_config_paths:
89
- full_config_path = os.path.join("configs", config_path)
90
- try:
91
- with open(full_config_path, "r") as f:
92
- config = json.load(f)
93
- config["train"]["fp16_run"] = fp16_run_value
94
- with open(full_config_path, "w") as f:
95
- json.dump(config, f, indent=4)
96
- except FileNotFoundError:
97
- print(f"File not found: {full_config_path}")
98
-
99
- if os.path.exists(preprocess_path):
100
- with open(preprocess_path, "r") as f:
101
- preprocess_content = f.read()
102
- preprocess_content = preprocess_content.replace(
103
- "3.0" if precision == "fp16" else "3.7", preprocess_target_version
104
- )
105
- with open(preprocess_path, "w") as f:
106
- f.write(preprocess_content)
107
-
108
- return f"Overwritten preprocess and config.json to use {precision}."
109
-
110
- def get_precision(self):
111
- if not version_config_paths:
112
- raise FileNotFoundError("No configuration paths provided.")
113
-
114
- full_config_path = os.path.join("configs", version_config_paths[0])
115
- try:
116
- with open(full_config_path, "r") as f:
117
- config = json.load(f)
118
- fp16_run_value = config["train"].get("fp16_run", False)
119
- precision = "fp16" if fp16_run_value else "fp32"
120
- return precision
121
- except FileNotFoundError:
122
- print(f"File not found: {full_config_path}")
123
- return None
124
-
125
- def device_config(self) -> tuple:
126
- if self.device.startswith("cuda"):
127
- self.set_cuda_config()
128
- elif self.has_mps():
129
- self.device = "mps"
130
- self.is_half = False
131
- self.set_precision("fp32")
132
- else:
133
- self.device = "cpu"
134
- self.is_half = False
135
- self.set_precision("fp32")
136
-
137
- # Configuration for 6GB GPU memory
138
- x_pad, x_query, x_center, x_max = (
139
- (3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
140
- )
141
- if self.gpu_mem is not None and self.gpu_mem <= 4:
142
- # Configuration for 5GB GPU memory
143
- x_pad, x_query, x_center, x_max = (1, 5, 30, 32)
144
-
145
- return x_pad, x_query, x_center, x_max
146
-
147
- def set_cuda_config(self):
148
- i_device = int(self.device.split(":")[-1])
149
- self.gpu_name = torch.cuda.get_device_name(i_device)
150
- low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"]
151
- if (
152
- any(gpu in self.gpu_name for gpu in low_end_gpus)
153
- and "V100" not in self.gpu_name.upper()
154
- ):
155
- self.is_half = False
156
- self.set_precision("fp32")
157
-
158
- self.gpu_mem = torch.cuda.get_device_properties(i_device).total_memory // (
159
- 1024**3
160
- )
161
- config = Config()
162
-
163
- def load_audio(file, sample_rate):
164
- try:
165
- file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
166
- audio, sr = sf.read(file)
167
- if len(audio.shape) > 1:
168
- audio = librosa.to_mono(audio.T)
169
- if sr != sample_rate:
170
- audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate)
171
- except Exception as error:
172
- raise RuntimeError(f"An error occurred loading the audio: {error}")
173
-
174
- return audio.flatten()
175
-
176
- #HuggingFacePlaceHolder = None
177
- class HubertModelWithFinalProj(HubertModel):
178
- def __init__(self, config):
179
- super().__init__(config)
180
- self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)
181
- print(config.hidden_size, config.classifier_proj_size)
182
-
183
- f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
184
-
185
-
186
- def printt(strr):
187
- print(strr)
188
- f.write("%s\n" % strr)
189
- f.flush()
190
-
191
-
192
- printt(sys.argv)
193
- model_path = sys.argv[7]
194
- Custom_Embed = False
195
- sample_embedding = sys.argv[8]
196
- if os.path.split(model_path)[-1] == "Custom" and sample_embedding == "hubert_base":
197
- model_path = "hubert_base.pt"
198
- Custom_Embed = True
199
- elif os.path.split(model_path)[-1] == "Custom" and sample_embedding == "contentvec_base":
200
- model_path = "contentvec_base.pt"
201
- Custom_Embed = True
202
- elif os.path.split(model_path)[-1] == "Custom" and sample_embedding == "hubert_base_japanese":
203
- model_path = "japanese_hubert_base.pt"
204
- Custom_Embed = True
205
- elif os.path.split(model_path)[-1] == "Custom" and sample_embedding == "hubert_large_ll60k":
206
- model_path = "hubert_large_ll60k.pt"
207
- Custom_Embed = True
208
-
209
- printt(exp_dir)
210
- wavPath = "%s/1_16k_wavs" % exp_dir
211
- outPath = (
212
- "%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir if version == "v2" and sample_embedding != "hubert_large_ll60k" else "%s/3_feature1024" % exp_dir
213
- )
214
- os.makedirs(outPath, exist_ok=True)
215
-
216
-
217
- # wave must be 16k, hop_size=320
218
- def readwave(wav_path, normalize=False):
219
- wav, sr = sf.read(wav_path)
220
- assert sr == 16000
221
- if Custom_Embed == False:
222
- feats = torch.from_numpy(wav).float()
223
- else:
224
- feats = torch.from_numpy(load_audio(wav_path, sr)).to(dtype).to(device)
225
- if feats.dim() == 2: # double channels
226
- feats = feats.mean(-1)
227
- assert feats.dim() == 1, feats.dim()
228
- if normalize:
229
- with torch.no_grad():
230
- feats = F.layer_norm(feats, feats.shape)
231
- feats = feats.view(1, -1)
232
- return feats
233
-
234
-
235
- # HuBERT model
236
- printt("load model(s) from {}".format(model_path))
237
- # if hubert model is exist
238
- if os.access(model_path, os.F_OK) == False:
239
- printt(
240
- "Error: Extracting is shut down because %s does not exist, you may download it from https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main"
241
- % model_path
242
- )
243
- exit(0)
244
- models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path])
245
- if Custom_Embed == False:
246
- model = models[0]
247
- if device not in ["mps", "cpu"]:
248
- model = model.half()
249
- elif sample_embedding == "hubert_large_ll60k":
250
- dtype = torch.float16 if config.is_half and "cuda" in device else torch.float32
251
- model = HubertModelWithFinalProj.from_pretrained("Custom/").to(dtype).to(device)
252
- else:
253
- dtype = torch.float16 if config.is_half and "cuda" in device else torch.float32
254
- model = HubertModelWithFinalProj.from_pretrained("Custom/").to(dtype).to(device)
255
- model = model.to(device)
256
- printt("move model to %s" % device)
257
- model.eval()
258
-
259
- todo = sorted(list(os.listdir(wavPath)))[i_part::n_part]
260
- n = max(1, len(todo) // 10)
261
- if len(todo) == 0:
262
- printt("no-feature-todo")
263
- else:
264
- printt("all-feature-%s" % len(todo))
265
- for idx, file in enumerate(todo):
266
- try:
267
- if file.endswith(".wav"):
268
- wav_path = "%s/%s" % (wavPath, file)
269
- out_path = "%s/%s" % (outPath, file.replace("wav", "npy"))
270
-
271
- if os.path.exists(out_path):
272
- continue
273
-
274
- feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
275
- padding_mask = torch.BoolTensor(feats.shape).fill_(False)
276
- inputs = {
277
- "source": feats.half().to(device)
278
- if device not in ["mps", "cpu"]
279
- else feats.to(device),
280
- "padding_mask": padding_mask.to(device),
281
- "output_layer": 9 if version == "v1" else 12 if sample_embedding != "hubert_large_ll60k" else 24, # layer 9
282
- }
283
- with torch.no_grad():
284
- if Custom_Embed == False:
285
- logits = model.extract_features(**inputs)
286
- feats = (
287
- model.final_proj(logits[0]) if version == "v1" else logits[0]
288
- )
289
- elif Custom_Embed == True:
290
- feats = model(feats)["last_hidden_state"]
291
- feats = (
292
- model.final_proj(feats[0]).unsqueeze(0) if version == "v1" else feats
293
- )
294
-
295
- feats = feats.squeeze(0).float().cpu().numpy()
296
- if np.isnan(feats).sum() == 0:
297
- np.save(out_path, feats, allow_pickle=False)
298
- else:
299
- printt("%s-contains nan" % file)
300
- if idx % n == 0:
301
- printt("now-%s,all-%s,%s,%s" % (idx, len(todo), file, feats.shape))
302
- except:
303
- printt(traceback.format_exc())
304
- printt("all-feature-done")