hirthickraj2015 commited on
Commit
36d6f84
·
1 Parent(s): 469f979

fixing download

Browse files
Files changed (2) hide show
  1. src/app.py +4 -2
  2. src/dataset_loader.py +43 -12
src/app.py CHANGED
@@ -118,8 +118,10 @@ def load_rag_engine():
118
  st.stop()
119
 
120
  # Ensure dataset files are downloaded from HF Datasets if needed
121
- # First check without UI to see if download is needed
122
- success, files_downloaded = ensure_dataset_files(show_ui=True)
 
 
123
  if not success:
124
  st.error("⚠️ Failed to load dataset files from Hugging Face Datasets.")
125
  st.info("Please check your internet connection and try again.")
 
118
  st.stop()
119
 
120
  # Ensure dataset files are downloaded from HF Datasets if needed
121
+ # Create a container for download progress that will be cleared after completion
122
+ download_container = st.container()
123
+ success, files_downloaded = ensure_dataset_files(progress_container=download_container)
124
+
125
  if not success:
126
  st.error("⚠️ Failed to load dataset files from Hugging Face Datasets.")
127
  st.info("Please check your internet connection and try again.")
src/dataset_loader.py CHANGED
@@ -24,14 +24,14 @@ DATASET_FILES = [
24
  "extraction_progress.json"
25
  ]
26
 
27
- def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland", show_ui: bool = False) -> tuple:
28
  """
29
  Ensure all dataset files are available locally.
30
  Downloads from HF Datasets if missing.
31
 
32
  Args:
33
  dataset_dir: Local directory for dataset files
34
- show_ui: Whether to show Streamlit UI indicators
35
 
36
  Returns:
37
  Tuple of (success: bool, files_downloaded: bool)
@@ -50,18 +50,30 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland", show_ui
50
  return True, False # Success, no files downloaded
51
 
52
  print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
53
- if show_ui:
54
- st.info(f"📥 Downloading {len(missing_files)} missing dataset files from Hugging Face...")
55
 
56
  # Download missing files
57
  import shutil
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  try:
59
  for idx, filename in enumerate(missing_files, 1):
60
  print(f"[INFO] Downloading {filename} ({idx}/{len(missing_files)})...")
61
 
62
- # Only show UI progress if show_ui is True
63
- if show_ui:
64
- st.progress((idx - 1) / len(missing_files), text=f"Downloading {filename}...")
 
65
 
66
  downloaded_path = hf_hub_download(
67
  repo_id=DATASET_REPO,
@@ -73,17 +85,36 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland", show_ui
73
  shutil.copy2(downloaded_path, target_path)
74
  print(f"[SUCCESS] Downloaded {filename}")
75
 
76
- if show_ui:
77
- st.progress(1.0, text="All files downloaded!")
78
- st.success("✅ Dataset files ready!")
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  print("[SUCCESS] All dataset files downloaded successfully!")
81
  return True, True # Success, files were downloaded
82
 
83
  except Exception as e:
84
  print(f"[ERROR] Failed to download dataset files: {e}")
85
- if show_ui:
86
- st.error(f"Failed to download dataset files: {e}")
 
 
 
 
 
 
 
 
87
  return False, False # Failure, no files downloaded
88
 
89
 
 
24
  "extraction_progress.json"
25
  ]
26
 
27
+ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland", progress_container=None) -> tuple:
28
  """
29
  Ensure all dataset files are available locally.
30
  Downloads from HF Datasets if missing.
31
 
32
  Args:
33
  dataset_dir: Local directory for dataset files
34
+ progress_container: Streamlit container for progress updates (optional)
35
 
36
  Returns:
37
  Tuple of (success: bool, files_downloaded: bool)
 
50
  return True, False # Success, no files downloaded
51
 
52
  print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
 
 
53
 
54
  # Download missing files
55
  import shutil
56
+ import time
57
+
58
+ # Create UI elements if container provided
59
+ status_placeholder = None
60
+ progress_bar_placeholder = None
61
+ progress_text_placeholder = None
62
+
63
+ if progress_container:
64
+ status_placeholder = progress_container.empty()
65
+ progress_text_placeholder = progress_container.empty()
66
+ progress_bar_placeholder = progress_container.empty()
67
+ status_placeholder.info(f"📥 Downloading {len(missing_files)} missing dataset files from Hugging Face...")
68
+
69
  try:
70
  for idx, filename in enumerate(missing_files, 1):
71
  print(f"[INFO] Downloading {filename} ({idx}/{len(missing_files)})...")
72
 
73
+ # Update progress if container provided
74
+ if progress_text_placeholder and progress_bar_placeholder:
75
+ progress_text_placeholder.text(f"⬇️ {filename} ({idx}/{len(missing_files)})")
76
+ progress_bar_placeholder.progress((idx - 1) / len(missing_files))
77
 
78
  downloaded_path = hf_hub_download(
79
  repo_id=DATASET_REPO,
 
85
  shutil.copy2(downloaded_path, target_path)
86
  print(f"[SUCCESS] Downloaded {filename}")
87
 
88
+ # Update to show completion
89
+ if progress_bar_placeholder:
90
+ progress_bar_placeholder.progress(1.0)
91
+
92
+ # Small delay to show completion, then clear
93
+ time.sleep(0.5)
94
+
95
+ # Clear ALL UI elements after successful download
96
+ if status_placeholder:
97
+ status_placeholder.empty()
98
+ if progress_text_placeholder:
99
+ progress_text_placeholder.empty()
100
+ if progress_bar_placeholder:
101
+ progress_bar_placeholder.empty()
102
 
103
  print("[SUCCESS] All dataset files downloaded successfully!")
104
  return True, True # Success, files were downloaded
105
 
106
  except Exception as e:
107
  print(f"[ERROR] Failed to download dataset files: {e}")
108
+ if progress_container:
109
+ # Clear progress indicators
110
+ if status_placeholder:
111
+ status_placeholder.empty()
112
+ if progress_text_placeholder:
113
+ progress_text_placeholder.empty()
114
+ if progress_bar_placeholder:
115
+ progress_bar_placeholder.empty()
116
+ # Show error
117
+ progress_container.error(f"❌ Failed to download dataset files: {e}")
118
  return False, False # Failure, no files downloaded
119
 
120