akhaliq HF Staff commited on
Commit
a970131
Β·
1 Parent(s): 1a400b9

fetch all files

Browse files
Files changed (1) hide show
  1. app.py +174 -64
app.py CHANGED
@@ -6838,6 +6838,133 @@ Type: Transformers.js Application
6838
 
6839
  return combined
6840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6841
  def fetch_hf_space_content(username: str, project_name: str) -> str:
6842
  """Fetch content from a Hugging Face Space"""
6843
  try:
@@ -6853,70 +6980,53 @@ def fetch_hf_space_content(username: str, project_name: str) -> str:
6853
  files = fetch_transformers_js_files(api, username, project_name)
6854
  return combine_transformers_js_files(files, username, project_name)
6855
 
6856
- # Try to fetch the main file based on SDK
6857
  sdk = space_info.sdk
6858
- main_file = None
6859
 
6860
- # Define file patterns to try based on SDK
6861
- if sdk == "static":
6862
- file_patterns = ["index.html"]
6863
- elif sdk == "gradio":
6864
- file_patterns = ["app.py", "main.py", "gradio_app.py"]
6865
- elif sdk == "streamlit":
6866
- file_patterns = ["streamlit_app.py", "src/streamlit_app.py", "app.py", "src/app.py", "main.py", "src/main.py", "Home.py", "src/Home.py", "🏠_Home.py", "src/🏠_Home.py", "1_🏠_Home.py", "src/1_🏠_Home.py"]
6867
  else:
6868
- # Try common files for unknown SDKs
6869
- file_patterns = ["app.py", "src/app.py", "index.html", "streamlit_app.py", "src/streamlit_app.py", "main.py", "src/main.py", "Home.py", "src/Home.py"]
6870
-
6871
- # Try to find and download the main file
6872
- for file in file_patterns:
6873
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6874
  content = api.hf_hub_download(
6875
  repo_id=f"{username}/{project_name}",
6876
- filename=file,
6877
  repo_type="space"
6878
  )
6879
- main_file = file
6880
- break
6881
- except:
6882
- continue
6883
-
6884
- # If still no main file found, try to list repository files and find Python files
6885
- if not main_file and sdk in ["streamlit", "gradio"]:
6886
- try:
6887
- from huggingface_hub import list_repo_files
6888
- files = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
6889
 
6890
- # Look for Python files that might be the main file (root and src/ directory)
6891
- python_files = [f for f in files if f.endswith('.py') and not f.startswith('.') and
6892
- (('/' not in f) or f.startswith('src/'))]
6893
 
6894
- for py_file in python_files:
6895
- try:
6896
- content = api.hf_hub_download(
6897
- repo_id=f"{username}/{project_name}",
6898
- filename=py_file,
6899
- repo_type="space"
6900
- )
6901
- main_file = py_file
6902
- break
6903
- except:
6904
- continue
6905
- except:
6906
- pass
6907
-
6908
- if main_file:
6909
- content = api.hf_hub_download(
6910
- repo_id=f"{username}/{project_name}",
6911
- filename=main_file,
6912
- repo_type="space"
6913
- )
6914
-
6915
- # Read the file content
6916
- with open(content, 'r', encoding='utf-8') as f:
6917
- file_content = f.read()
6918
-
6919
- return f"""IMPORTED PROJECT FROM HUGGING FACE SPACE
6920
  ==============================================
6921
 
6922
  Space: {username}/{project_name}
@@ -6924,15 +7034,15 @@ SDK: {sdk}
6924
  Main File: {main_file}
6925
 
6926
  {file_content}"""
6927
- else:
6928
- # Try to get more information about available files for debugging
6929
- try:
6930
- from huggingface_hub import list_repo_files
6931
- files = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
6932
- available_files = [f for f in files if not f.startswith('.') and not f.endswith('.md')]
6933
- return f"Error: Could not find main file in space {username}/{project_name}.\n\nSDK: {sdk}\nAvailable files: {', '.join(available_files[:10])}{'...' if len(available_files) > 10 else ''}\n\nTried looking for: {', '.join(file_patterns)}"
6934
- except:
6935
- return f"Error: Could not find main file in space {username}/{project_name}. Expected files for {sdk} SDK: {', '.join(file_patterns) if 'file_patterns' in locals() else 'standard files'}"
6936
 
6937
  except Exception as e:
6938
  return f"Error fetching space content: {str(e)}"
 
6838
 
6839
  return combined
6840
 
6841
+ def fetch_all_space_files(api, username: str, project_name: str, sdk: str) -> dict:
6842
+ """Fetch all relevant files from a Hugging Face Space"""
6843
+ files = {}
6844
+
6845
+ try:
6846
+ from huggingface_hub import list_repo_files
6847
+ all_files = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
6848
+
6849
+ # Filter out unwanted files
6850
+ relevant_files = []
6851
+ for file in all_files:
6852
+ # Skip hidden files, git files, and certain extensions
6853
+ if (file.startswith('.') or
6854
+ file.endswith('.md') or
6855
+ (file.endswith('.txt') and file not in ['requirements.txt', 'packages.txt']) or
6856
+ file.endswith('.log') or
6857
+ file.endswith('.pyc') or
6858
+ '__pycache__' in file):
6859
+ continue
6860
+ relevant_files.append(file)
6861
+
6862
+ # Define priority files based on SDK
6863
+ priority_files = []
6864
+ if sdk == "gradio":
6865
+ priority_files = ["app.py", "main.py", "gradio_app.py", "requirements.txt", "packages.txt"]
6866
+ elif sdk == "streamlit":
6867
+ priority_files = ["streamlit_app.py", "app.py", "main.py", "requirements.txt", "packages.txt"]
6868
+ elif sdk == "static":
6869
+ priority_files = ["index.html", "index.js", "style.css", "script.js"]
6870
+
6871
+ # Add priority files first, then other Python files, then other files
6872
+ files_to_fetch = []
6873
+
6874
+ # Add priority files that exist
6875
+ for pfile in priority_files:
6876
+ if pfile in relevant_files:
6877
+ files_to_fetch.append(pfile)
6878
+ relevant_files.remove(pfile)
6879
+
6880
+ # Add other Python files
6881
+ python_files = [f for f in relevant_files if f.endswith('.py')]
6882
+ files_to_fetch.extend(python_files)
6883
+ for pf in python_files:
6884
+ if pf in relevant_files:
6885
+ relevant_files.remove(pf)
6886
+
6887
+ # Add other important files (JS, CSS, JSON, etc.)
6888
+ other_important = [f for f in relevant_files if any(f.endswith(ext) for ext in ['.js', '.css', '.json', '.html', '.yml', '.yaml'])]
6889
+ files_to_fetch.extend(other_important)
6890
+
6891
+ # Limit to reasonable number of files to avoid overwhelming
6892
+ files_to_fetch = files_to_fetch[:20] # Max 20 files
6893
+
6894
+ # Download each file
6895
+ for file_name in files_to_fetch:
6896
+ try:
6897
+ content_path = api.hf_hub_download(
6898
+ repo_id=f"{username}/{project_name}",
6899
+ filename=file_name,
6900
+ repo_type="space"
6901
+ )
6902
+
6903
+ # Read file content with appropriate encoding
6904
+ try:
6905
+ with open(content_path, 'r', encoding='utf-8') as f:
6906
+ files[file_name] = f.read()
6907
+ except UnicodeDecodeError:
6908
+ # For binary files or files with different encoding
6909
+ with open(content_path, 'rb') as f:
6910
+ content = f.read()
6911
+ # Skip binary files that are too large or not text
6912
+ if len(content) > 100000: # Skip files > 100KB
6913
+ files[file_name] = f"[Binary file: {file_name} - {len(content)} bytes]"
6914
+ else:
6915
+ try:
6916
+ files[file_name] = content.decode('utf-8')
6917
+ except:
6918
+ files[file_name] = f"[Binary file: {file_name} - {len(content)} bytes]"
6919
+ except Exception as e:
6920
+ files[file_name] = f"[Error loading {file_name}: {str(e)}]"
6921
+
6922
+ except Exception as e:
6923
+ # Fallback to single file loading
6924
+ return {}
6925
+
6926
+ return files
6927
+
6928
+ def format_multi_file_space(files: dict, username: str, project_name: str, sdk: str) -> str:
6929
+ """Format multiple files from a space into a readable format"""
6930
+ if not files:
6931
+ return ""
6932
+
6933
+ header = f"""IMPORTED PROJECT FROM HUGGING FACE SPACE
6934
+ ==============================================
6935
+
6936
+ Space: {username}/{project_name}
6937
+ SDK: {sdk}
6938
+ Files: {len(files)} files loaded
6939
+
6940
+ """
6941
+
6942
+ # Sort files to show main files first
6943
+ main_files = []
6944
+ other_files = []
6945
+
6946
+ priority_order = ["app.py", "main.py", "streamlit_app.py", "gradio_app.py", "index.html", "requirements.txt"]
6947
+
6948
+ for priority_file in priority_order:
6949
+ if priority_file in files:
6950
+ main_files.append(priority_file)
6951
+
6952
+ for file_name in sorted(files.keys()):
6953
+ if file_name not in main_files:
6954
+ other_files.append(file_name)
6955
+
6956
+ content = header
6957
+
6958
+ # Add main files first
6959
+ for file_name in main_files:
6960
+ content += f"=== {file_name} ===\n{files[file_name]}\n\n"
6961
+
6962
+ # Add other files
6963
+ for file_name in other_files:
6964
+ content += f"=== {file_name} ===\n{files[file_name]}\n\n"
6965
+
6966
+ return content
6967
+
6968
  def fetch_hf_space_content(username: str, project_name: str) -> str:
6969
  """Fetch content from a Hugging Face Space"""
6970
  try:
 
6980
  files = fetch_transformers_js_files(api, username, project_name)
6981
  return combine_transformers_js_files(files, username, project_name)
6982
 
6983
+ # Use the new multi-file loading approach for all space types
6984
  sdk = space_info.sdk
6985
+ files = fetch_all_space_files(api, username, project_name, sdk)
6986
 
6987
+ if files:
6988
+ # Use the multi-file format
6989
+ return format_multi_file_space(files, username, project_name, sdk)
 
 
 
 
6990
  else:
6991
+ # Fallback to single file loading for compatibility
6992
+ main_file = None
6993
+
6994
+ # Define file patterns to try based on SDK
6995
+ if sdk == "static":
6996
+ file_patterns = ["index.html"]
6997
+ elif sdk == "gradio":
6998
+ file_patterns = ["app.py", "main.py", "gradio_app.py"]
6999
+ elif sdk == "streamlit":
7000
+ file_patterns = ["streamlit_app.py", "src/streamlit_app.py", "app.py", "src/app.py", "main.py", "src/main.py", "Home.py", "src/Home.py", "🏠_Home.py", "src/🏠_Home.py", "1_🏠_Home.py", "src/1_🏠_Home.py"]
7001
+ else:
7002
+ # Try common files for unknown SDKs
7003
+ file_patterns = ["app.py", "src/app.py", "index.html", "streamlit_app.py", "src/streamlit_app.py", "main.py", "src/main.py", "Home.py", "src/Home.py"]
7004
+
7005
+ # Try to find and download the main file
7006
+ for file in file_patterns:
7007
+ try:
7008
+ content = api.hf_hub_download(
7009
+ repo_id=f"{username}/{project_name}",
7010
+ filename=file,
7011
+ repo_type="space"
7012
+ )
7013
+ main_file = file
7014
+ break
7015
+ except:
7016
+ continue
7017
+
7018
+ if main_file:
7019
  content = api.hf_hub_download(
7020
  repo_id=f"{username}/{project_name}",
7021
+ filename=main_file,
7022
  repo_type="space"
7023
  )
 
 
 
 
 
 
 
 
 
 
7024
 
7025
+ # Read the file content
7026
+ with open(content, 'r', encoding='utf-8') as f:
7027
+ file_content = f.read()
7028
 
7029
+ return f"""IMPORTED PROJECT FROM HUGGING FACE SPACE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7030
  ==============================================
7031
 
7032
  Space: {username}/{project_name}
 
7034
  Main File: {main_file}
7035
 
7036
  {file_content}"""
7037
+ else:
7038
+ # Try to get more information about available files for debugging
7039
+ try:
7040
+ from huggingface_hub import list_repo_files
7041
+ files_list = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
7042
+ available_files = [f for f in files_list if not f.startswith('.') and not f.endswith('.md')]
7043
+ return f"Error: Could not find main file in space {username}/{project_name}.\n\nSDK: {sdk}\nAvailable files: {', '.join(available_files[:10])}{'...' if len(available_files) > 10 else ''}\n\nTried looking for: {', '.join(file_patterns)}"
7044
+ except:
7045
+ return f"Error: Could not find main file in space {username}/{project_name}. Expected files for {sdk} SDK: {', '.join(file_patterns) if 'file_patterns' in locals() else 'standard files'}"
7046
 
7047
  except Exception as e:
7048
  return f"Error fetching space content: {str(e)}"