Spaces:
Build error
Build error
| import os | |
| import requests | |
| from multiprocessing.pool import ThreadPool | |
| def download_file(url, output_dir): | |
| filename = url.split('/')[-1] | |
| filepath = os.path.join(output_dir, filename) | |
| try: | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| with open(filepath, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| print(f"Downloaded {filename}") | |
| except Exception as e: | |
| print(f"Error downloading {filename}: {str(e)}") | |
| def download_files(urls, output_dir): | |
| os.makedirs(output_dir, exist_ok=True) | |
| pool = ThreadPool(processes=os.cpu_count()) | |
| for url in urls: | |
| pool.apply_async(download_file, (url, output_dir)) | |
| pool.close() | |
| pool.join() | |
| if __name__ == '__main__': | |
| urls_file = 'urls.txt' # 按照你的描述,音频文件的URL存储在名为urls.txt的txt文件中 | |
| output_directory = 'dataset' # 下载的音频文件将保存在名为dataset的文件夹中 | |
| with open(urls_file, 'r') as file: | |
| urls = file.read().splitlines() | |
| download_files(urls, output_directory) | |