Spaces:
Build error
Build error
| import argparse | |
| import functools | |
| import os | |
| import pathlib | |
| from multiprocessing.pool import ThreadPool | |
| from tqdm import tqdm | |
| ''' | |
| Helper function for scripts that iterate over large sets of files. Defines command-line arguments | |
| for operating over a large set of files, then handles setting up a worker queue system to operate | |
| on those files. You need to provide your own process_file_fn. | |
| process_file_fn expected signature: | |
| (path, output_path) | |
| ''' | |
| def do_to_files(process_file_fn): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--path') | |
| parser.add_argument('--glob') | |
| parser.add_argument('--out') | |
| parser.add_argument('--resume') | |
| parser.add_argument('--num_workers') | |
| args = parser.parse_args() | |
| src = args.path | |
| glob = args.glob | |
| out = args.out | |
| resume = args.resume | |
| num_workers = int(args.num_workers) | |
| path = pathlib.Path(src) | |
| files = path.rglob(glob) | |
| files = [str(f) for f in files] | |
| files = files[resume:] | |
| pfn = functools.partial(process_file_fn, output_path=out) | |
| if num_workers > 0: | |
| with ThreadPool(num_workers) as pool: | |
| list(tqdm(pool.imap(pfn, files), total=len(files))) | |
| else: | |
| for f in tqdm(files): | |
| pfn(f) | |