kadirnar commited on
Commit
7d04939
·
verified ·
1 Parent(s): a0ea740

Update audio_tokenizer.py

Browse files
Files changed (1) hide show
  1. audio_tokenizer.py +3 -2
audio_tokenizer.py CHANGED
@@ -157,11 +157,12 @@ def process_dataset(
157
  repo_type="dataset",
158
  revision="main",
159
  max_workers=64,
 
160
  )
161
 
162
  # Load dataset
163
  print("Loading dataset...")
164
- ds = load_dataset(original_dataset, split="train")
165
  ds_sample_rate = ds[0]["audio"]["sampling_rate"]
166
 
167
  # Load SNAC model
@@ -272,5 +273,5 @@ For multispeaker models, ensure your dataset has a "source" field.
272
 
273
  # Upload processed dataset
274
  print(f"Pushing dataset to: {output_dataset}")
275
- ds.push_to_hub(output_dataset)
276
  print("Done!")
 
157
  repo_type="dataset",
158
  revision="main",
159
  max_workers=64,
160
+ token=os.environ.get("HF_TOKEN")
161
  )
162
 
163
  # Load dataset
164
  print("Loading dataset...")
165
+ ds = load_dataset(original_dataset, split="train", token=os.environ.get("HF_TOKEN"))
166
  ds_sample_rate = ds[0]["audio"]["sampling_rate"]
167
 
168
  # Load SNAC model
 
273
 
274
  # Upload processed dataset
275
  print(f"Pushing dataset to: {output_dataset}")
276
+ ds.push_to_hub(output_dataset, token=os.environ.get("HF_TOKEN"))
277
  print("Done!")