Upload 2 files
Browse files- app.py +30 -5
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -15,8 +15,23 @@ Environment variables:
|
|
| 15 |
"""
|
| 16 |
|
| 17 |
import os
|
|
|
|
|
|
|
| 18 |
from pathlib import Path
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
import spaces
|
| 21 |
|
| 22 |
# Data source configuration
|
|
@@ -83,6 +98,14 @@ def download_checkpoint(output_dir: Path, model: str) -> None:
|
|
| 83 |
print(" Generation will be disabled without checkpoint")
|
| 84 |
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
def regenerate_umap(data_dir: Path, model: str) -> bool:
|
| 87 |
"""Regenerate UMAP pickle for a model to ensure numba compatibility.
|
| 88 |
|
|
@@ -122,24 +145,26 @@ def regenerate_umap(data_dir: Path, model: str) -> bool:
|
|
| 122 |
with open(json_path, "r") as f:
|
| 123 |
umap_params = json.load(f)
|
| 124 |
|
|
|
|
| 125 |
print(f" Regenerating UMAP for {model}...")
|
| 126 |
-
print(f" Params: n_neighbors={umap_params.get('n_neighbors', 15)}, min_dist={umap_params.get('min_dist', 0.1)}")
|
| 127 |
|
| 128 |
try:
|
| 129 |
# Load activations
|
| 130 |
activations, metadata_df = load_dataset_activations(activation_dir, metadata_path)
|
| 131 |
-
print(f" Loaded {activations.shape[0]} activations")
|
| 132 |
|
| 133 |
-
# Compute UMAP
|
| 134 |
-
embeddings, reducer, scaler = compute_umap(
|
| 135 |
activations,
|
| 136 |
n_neighbors=umap_params.get("n_neighbors", 15),
|
| 137 |
min_dist=umap_params.get("min_dist", 0.1),
|
| 138 |
normalize=True,
|
|
|
|
| 139 |
)
|
| 140 |
|
| 141 |
# Save (overwrites existing pickle with compatible version)
|
| 142 |
-
save_embeddings(embeddings, metadata_df, csv_path, umap_params, reducer, scaler)
|
| 143 |
print(f" UMAP pickle regenerated: {pkl_path}")
|
| 144 |
return True
|
| 145 |
|
|
|
|
| 15 |
"""
|
| 16 |
|
| 17 |
import os
|
| 18 |
+
import subprocess
|
| 19 |
+
import sys
|
| 20 |
from pathlib import Path
|
| 21 |
|
| 22 |
+
# Install diffviews from git to bypass pip cache issues
|
| 23 |
+
_REPO_URL = "https://github.com/mckellcarter/diffviews.git"
|
| 24 |
+
_REPO_BRANCH = os.environ.get("DIFFVIEWS_BRANCH", "feature/modal-transition")
|
| 25 |
+
_REPO_DIR = "/tmp/diffviews"
|
| 26 |
+
|
| 27 |
+
if not os.path.exists(_REPO_DIR):
|
| 28 |
+
print(f"Cloning diffviews from {_REPO_BRANCH}...")
|
| 29 |
+
subprocess.run(
|
| 30 |
+
["git", "clone", "--depth=1", "-b", _REPO_BRANCH, _REPO_URL, _REPO_DIR],
|
| 31 |
+
check=True,
|
| 32 |
+
)
|
| 33 |
+
sys.path.insert(0, _REPO_DIR)
|
| 34 |
+
|
| 35 |
import spaces
|
| 36 |
|
| 37 |
# Data source configuration
|
|
|
|
| 98 |
print(" Generation will be disabled without checkpoint")
|
| 99 |
|
| 100 |
|
| 101 |
+
def get_pca_components() -> int | None:
|
| 102 |
+
"""Read PCA pre-reduction setting from env. None = disabled."""
|
| 103 |
+
val = os.environ.get("DIFFVIEWS_PCA_COMPONENTS", "50")
|
| 104 |
+
if val.lower() in ("0", "none", "off", ""):
|
| 105 |
+
return None
|
| 106 |
+
return int(val)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
def regenerate_umap(data_dir: Path, model: str) -> bool:
|
| 110 |
"""Regenerate UMAP pickle for a model to ensure numba compatibility.
|
| 111 |
|
|
|
|
| 145 |
with open(json_path, "r") as f:
|
| 146 |
umap_params = json.load(f)
|
| 147 |
|
| 148 |
+
pca_components = get_pca_components()
|
| 149 |
print(f" Regenerating UMAP for {model}...")
|
| 150 |
+
print(f" Params: n_neighbors={umap_params.get('n_neighbors', 15)}, min_dist={umap_params.get('min_dist', 0.1)}, pca={pca_components}")
|
| 151 |
|
| 152 |
try:
|
| 153 |
# Load activations
|
| 154 |
activations, metadata_df = load_dataset_activations(activation_dir, metadata_path)
|
| 155 |
+
print(f" Loaded {activations.shape[0]} activations ({activations.shape[1]} dims)")
|
| 156 |
|
| 157 |
+
# Compute UMAP (with optional PCA pre-reduction)
|
| 158 |
+
embeddings, reducer, scaler, pca_reducer = compute_umap(
|
| 159 |
activations,
|
| 160 |
n_neighbors=umap_params.get("n_neighbors", 15),
|
| 161 |
min_dist=umap_params.get("min_dist", 0.1),
|
| 162 |
normalize=True,
|
| 163 |
+
pca_components=pca_components,
|
| 164 |
)
|
| 165 |
|
| 166 |
# Save (overwrites existing pickle with compatible version)
|
| 167 |
+
save_embeddings(embeddings, metadata_df, csv_path, umap_params, reducer, scaler, pca_reducer)
|
| 168 |
print(f" UMAP pickle regenerated: {pkl_path}")
|
| 169 |
return True
|
| 170 |
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# DiffViews - HuggingFace Spaces Requirements
|
| 2 |
# Requires Python 3.10+
|
| 3 |
-
git
|
| 4 |
|
| 5 |
# Core dependencies
|
| 6 |
torch>=2.0.0
|
|
|
|
| 1 |
# DiffViews - HuggingFace Spaces Requirements
|
| 2 |
# Requires Python 3.10+
|
| 3 |
+
# Package installed via git clone in app.py (bypasses pip cache)
|
| 4 |
|
| 5 |
# Core dependencies
|
| 6 |
torch>=2.0.0
|