updates
Browse files- README.md +2 -2
- tortoise_tts.ipynb +7 -3
README.md
CHANGED
|
@@ -77,8 +77,8 @@ These reference clips are recordings of a speaker that you provide to guide spee
|
|
| 77 |
This repo comes with several pre-packaged voices. You will be familiar with many of them. :)
|
| 78 |
|
| 79 |
Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set
|
| 80 |
-
produce more realistic outputs then those outside of the training set.
|
| 81 |
-
|
| 82 |
|
| 83 |
### Adding a new voice
|
| 84 |
|
|
|
|
| 77 |
This repo comes with several pre-packaged voices. You will be familiar with many of them. :)
|
| 78 |
|
| 79 |
Most of the provided voices were not found in the training set. Experimentally, it seems that voices from the training set
|
| 80 |
+
produce more realistic outputs then those outside of the training set. Any voice prepended with "train" came from the
|
| 81 |
+
training set.
|
| 82 |
|
| 83 |
### Adding a new voice
|
| 84 |
|
tortoise_tts.ipynb
CHANGED
|
@@ -52,6 +52,8 @@
|
|
| 52 |
"import torch.nn as nn\n",
|
| 53 |
"import torch.nn.functional as F\n",
|
| 54 |
"\n",
|
|
|
|
|
|
|
| 55 |
"from api import TextToSpeech\n",
|
| 56 |
"from utils.audio import load_audio, get_voices\n",
|
| 57 |
"\n",
|
|
@@ -93,7 +95,7 @@
|
|
| 93 |
"Had worn them really about the same,\"\"\"\n",
|
| 94 |
"\n",
|
| 95 |
"# Pick one of the voices from above\n",
|
| 96 |
-
"voice = '
|
| 97 |
"# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n",
|
| 98 |
"preset = \"fast\""
|
| 99 |
],
|
|
@@ -115,7 +117,8 @@
|
|
| 115 |
" conds.append(c)\n",
|
| 116 |
"\n",
|
| 117 |
"gen = tts.tts_with_preset(text, conds, preset)\n",
|
| 118 |
-
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)"
|
|
|
|
| 119 |
],
|
| 120 |
"metadata": {
|
| 121 |
"id": "KEXOKjIvn6NW"
|
|
@@ -139,7 +142,8 @@
|
|
| 139 |
" conds.append(c)\n",
|
| 140 |
"\n",
|
| 141 |
"gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, he’d have wings. But he did fly. He discovered he had to.\", conds, preset)\n",
|
| 142 |
-
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)"
|
|
|
|
| 143 |
],
|
| 144 |
"metadata": {
|
| 145 |
"id": "fYTk8KUezUr5"
|
|
|
|
| 52 |
"import torch.nn as nn\n",
|
| 53 |
"import torch.nn.functional as F\n",
|
| 54 |
"\n",
|
| 55 |
+
"import IPython\n",
|
| 56 |
+
"\n",
|
| 57 |
"from api import TextToSpeech\n",
|
| 58 |
"from utils.audio import load_audio, get_voices\n",
|
| 59 |
"\n",
|
|
|
|
| 95 |
"Had worn them really about the same,\"\"\"\n",
|
| 96 |
"\n",
|
| 97 |
"# Pick one of the voices from above\n",
|
| 98 |
+
"voice = 'train_dotrice'\n",
|
| 99 |
"# Pick a \"preset mode\" to determine quality. Options: {\"ultra_fast\", \"fast\" (default), \"standard\", \"high_quality\"}. See docs in api.py\n",
|
| 100 |
"preset = \"fast\""
|
| 101 |
],
|
|
|
|
| 117 |
" conds.append(c)\n",
|
| 118 |
"\n",
|
| 119 |
"gen = tts.tts_with_preset(text, conds, preset)\n",
|
| 120 |
+
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)\n",
|
| 121 |
+
"IPython.display.Audio('generated.wav')"
|
| 122 |
],
|
| 123 |
"metadata": {
|
| 124 |
"id": "KEXOKjIvn6NW"
|
|
|
|
| 142 |
" conds.append(c)\n",
|
| 143 |
"\n",
|
| 144 |
"gen = tts.tts_with_preset(\"They used to say that if man was meant to fly, he’d have wings. But he did fly. He discovered he had to.\", conds, preset)\n",
|
| 145 |
+
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)\n",
|
| 146 |
+
"IPython.display.Audio('captain_kirkard.wav')"
|
| 147 |
],
|
| 148 |
"metadata": {
|
| 149 |
"id": "fYTk8KUezUr5"
|