Spaces:
Runtime error
Runtime error
| # Copyright 2024 NVIDIA CORPORATION & AFFILIATES | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # This file is copied from https://github.com/NVlabs/VILA/tree/main/llava/wids | |
| import fcntl | |
| import os | |
| import shutil | |
| import sys | |
| import time | |
| from collections import deque | |
| from datetime import datetime | |
| from urllib.parse import urlparse | |
| recent_downloads = deque(maxlen=1000) | |
| open_objects = {} | |
| max_open_objects = 100 | |
| class ULockFile: | |
| """A simple locking class. We don't need any of the third | |
| party libraries since we rely on POSIX semantics for linking | |
| below anyway.""" | |
| def __init__(self, path): | |
| self.lockfile_path = path | |
| self.lockfile = None | |
| def __enter__(self): | |
| self.lockfile = open(self.lockfile_path, "w") | |
| fcntl.flock(self.lockfile.fileno(), fcntl.LOCK_EX) | |
| return self | |
| def __exit__(self, exc_type, exc_val, exc_tb): | |
| fcntl.flock(self.lockfile.fileno(), fcntl.LOCK_UN) | |
| self.lockfile.close() | |
| self.lockfile = None | |
| try: | |
| os.unlink(self.lockfile_path) | |
| except FileNotFoundError: | |
| pass | |
| def pipe_download(remote, local): | |
| """Perform a download for a pipe: url.""" | |
| assert remote.startswith("pipe:") | |
| cmd = remote[5:] | |
| cmd = cmd.format(local=local) | |
| assert os.system(cmd) == 0, "Command failed: %s" % cmd | |
| def copy_file(remote, local): | |
| remote = urlparse(remote) | |
| assert remote.scheme in ["file", ""] | |
| # use absolute path | |
| remote = os.path.abspath(remote.path) | |
| local = urlparse(local) | |
| assert local.scheme in ["file", ""] | |
| local = os.path.abspath(local.path) | |
| if remote == local: | |
| return | |
| # check if the local file exists | |
| shutil.copyfile(remote, local) | |
| verbose_cmd = int(os.environ.get("WIDS_VERBOSE_CMD", "0")) | |
| def vcmd(flag, verbose_flag=""): | |
| return verbose_flag if verbose_cmd else flag | |
| default_cmds = { | |
| "posixpath": copy_file, | |
| "file": copy_file, | |
| "pipe": pipe_download, | |
| "http": "curl " + vcmd("-s") + " -L {url} -o {local}", | |
| "https": "curl " + vcmd("-s") + " -L {url} -o {local}", | |
| "ftp": "curl " + vcmd("-s") + " -L {url} -o {local}", | |
| "ftps": "curl " + vcmd("-s") + " -L {url} -o {local}", | |
| "gs": "gsutil " + vcmd("-q") + " cp {url} {local}", | |
| "s3": "aws s3 cp {url} {local}", | |
| } | |
| # TODO(ligeng): change HTTPS download to python requests library | |
| def download_file_no_log(remote, local, handlers=default_cmds): | |
| """Download a file from a remote url to a local path. | |
| The remote url can be a pipe: url, in which case the remainder of | |
| the url is treated as a command template that is executed to perform the download. | |
| """ | |
| if remote.startswith("pipe:"): | |
| schema = "pipe" | |
| else: | |
| schema = urlparse(remote).scheme | |
| if schema is None or schema == "": | |
| schema = "posixpath" | |
| # get the handler | |
| handler = handlers.get(schema) | |
| if handler is None: | |
| raise ValueError("Unknown schema: %s" % schema) | |
| # call the handler | |
| if callable(handler): | |
| handler(remote, local) | |
| else: | |
| assert isinstance(handler, str) | |
| cmd = handler.format(url=remote, local=local) | |
| assert os.system(cmd) == 0, "Command failed: %s" % cmd | |
| return local | |
| def download_file(remote, local, handlers=default_cmds, verbose=False): | |
| start = time.time() | |
| try: | |
| return download_file_no_log(remote, local, handlers=handlers) | |
| finally: | |
| recent_downloads.append((remote, local, time.time(), time.time() - start)) | |
| if verbose: | |
| print( | |
| "downloaded", | |
| remote, | |
| "to", | |
| local, | |
| "in", | |
| time.time() - start, | |
| "seconds", | |
| file=sys.stderr, | |
| ) | |
| def download_and_open(remote, local, mode="rb", handlers=default_cmds, verbose=False): | |
| with ULockFile(local + ".lock"): | |
| if os.path.exists(remote): | |
| # print("enter1", remote, local, mode) | |
| result = open(remote, mode) | |
| else: | |
| # print("enter2", remote, local, mode) | |
| if not os.path.exists(local): | |
| if verbose: | |
| print("downloading", remote, "to", local, file=sys.stderr) | |
| download_file(remote, local, handlers=handlers) | |
| else: | |
| if verbose: | |
| print("using cached", local, file=sys.stderr) | |
| result = open(local, mode) | |
| # input() | |
| if open_objects is not None: | |
| for k, v in list(open_objects.items()): | |
| if v.closed: | |
| del open_objects[k] | |
| if len(open_objects) > max_open_objects: | |
| raise RuntimeError("Too many open objects") | |
| current_time = datetime.now().strftime("%Y%m%d%H%M%S") | |
| key = tuple(str(x) for x in [remote, local, mode, current_time]) | |
| open_objects[key] = result | |
| return result | |