| from transformers import AutoModel | |
| import librosa | |
| import wget | |
| from modeling_diva import DiVAModel | |
| filename = wget.download( | |
| "https://github.com/ffaisal93/SD-QA/raw/refs/heads/master/dev/eng/irl/wav_eng/-1008642825401516622.wav" | |
| ) | |
| speech_data, _ = librosa.load(filename, sr=16_000) | |
| model = DiVAModel.from_pretrained("./") | |
| print(model.generate([speech_data])) | |
| print(model.generate([speech_data], ["Reply Briefly Like A Pirate"])) | |
| filename = wget.download( | |
| "https://github.com/ffaisal93/SD-QA/raw/refs/heads/master/dev/eng/irl/wav_eng/-2426554427049983479.wav" | |
| ) | |
| speech_data2, _ = librosa.load(filename, sr=16_000) | |
| print( | |
| model.generate( | |
| [speech_data, speech_data2], | |
| ["Reply Briefly Like A Pirate", "Reply Briefly Like A New Yorker"], | |
| ) | |
| ) | |