File size: 17,786 Bytes
fe42caa
523ac73
2d0a69d
18c5dec
 
523ac73
 
52ce752
 
18c5dec
fe42caa
 
a2ea4b0
 
fe42caa
2d0a69d
52ce752
 
 
 
 
fe42caa
52ce752
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af3a8dc
a2ea4b0
4e2684d
 
a2ea4b0
18c5dec
2d0a69d
351f890
2d0a69d
 
18c5dec
2d0a69d
fe42caa
 
18c5dec
 
2d0a69d
f8f509f
2d0a69d
 
18c5dec
2d0a69d
fe42caa
18c5dec
2d0a69d
f8f509f
2d0a69d
18c5dec
2d0a69d
fe42caa
2414030
2d0a69d
18c5dec
2d0a69d
 
fe42caa
 
a2ea4b0
 
18c5dec
4e2684d
2d0a69d
fe42caa
18c5dec
2d0a69d
 
 
 
2414030
2d0a69d
 
2414030
 
 
2d0a69d
 
 
 
2414030
2d0a69d
 
4e2684d
2d0a69d
c66335f
18c5dec
 
 
 
 
2d0a69d
fe42caa
 
18c5dec
af3a8dc
8959513
fe42caa
18c5dec
2d0a69d
fe42caa
1b61efc
f8f509f
 
 
 
 
 
 
78a48da
97612c8
f8f509f
5f477f6
f8f509f
 
78a48da
f8f509f
4e2684d
18c5dec
4e2684d
52ce752
18c5dec
52ce752
fe42caa
 
18c5dec
 
4e2684d
 
2d0a69d
52ce752
4e2684d
52ce752
 
 
 
 
 
 
 
 
 
 
 
18c5dec
52ce752
 
fe42caa
4e2684d
2d0a69d
 
fe42caa
 
af3a8dc
2d0a69d
4e2684d
2d0a69d
 
6fb9d53
4e2684d
18c5dec
6fb9d53
4e2684d
 
52ce752
 
 
 
 
 
 
 
 
4e2684d
52ce752
 
 
af3a8dc
52ce752
fe42caa
 
52ce752
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d0a69d
52ce752
 
18c5dec
523ac73
 
4e2684d
2d0a69d
4e2684d
2414030
4e2684d
2414030
523ac73
 
 
2414030
4e2684d
2414030
523ac73
2414030
 
2d0a69d
 
523ac73
2414030
 
2d0a69d
523ac73
2414030
523ac73
 
 
 
18c5dec
a2ea4b0
18c5dec
 
4e2684d
 
b49016e
fe42caa
523ac73
a7c17e4
2d0a69d
 
fe42caa
523ac73
 
2414030
4e2684d
523ac73
2d0a69d
 
4e2684d
2414030
4e2684d
 
523ac73
2d0a69d
 
 
f8f509f
 
523ac73
2d0a69d
d7e325f
2d0a69d
52ce752
2d0a69d
523ac73
2414030
 
 
 
 
 
523ac73
4e2684d
2414030
2d0a69d
2414030
523ac73
2414030
2d0a69d
4e2684d
523ac73
2414030
 
 
 
 
523ac73
2414030
2d0a69d
f8f509f
4e2684d
b49016e
4e2684d
2d0a69d
 
52ce752
 
f8f509f
1b61efc
f8f509f
1a9c9d5
 
f8f509f
b49016e
2d0a69d
 
4e2684d
2d0a69d
 
52ce752
 
4e2684d
2d0a69d
52ce752
 
 
 
2d0a69d
 
 
 
fe42caa
 
af3a8dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
import gradio as gr
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.utils import HfHubHTTPError
import os
import uuid
import shutil
import tempfile
import pandas as pd
import io
# --- State Management and API Client ---

def get_hf_api(token):
    """Initializes the HfApi client. Allows read-only operations if no token is provided."""
    return HfApi(token=token if token else None)

# --- Core Logic Functions ---
def is_tabular(filepath):
    """Checks if the file is a supported tabular dataset."""
    if not filepath: return False
    ext = os.path.splitext(filepath)[1].lower()
    return ext in ['.csv', '.tsv', '.json', '.jsonl', '.parquet']

def load_tabular_content(token, repo_id, repo_type, filepath):
    """Loads a file into a Pandas DataFrame."""
    try:
        api = get_hf_api(token)
        local_path = api.hf_hub_download(repo_id=repo_id, repo_type=repo_type, filename=filepath, token=token)
        ext = os.path.splitext(filepath)[1].lower()
        
        if ext == '.csv': return pd.read_csv(local_path)
        if ext == '.tsv': return pd.read_csv(local_path, sep='\t')
        if ext == '.parquet': return pd.read_parquet(local_path)
        if ext == '.json': 
            try: return pd.read_json(local_path)
            except: return pd.read_json(local_path, lines=True) # Try JSONL
        if ext == '.jsonl': return pd.read_json(local_path, lines=True)
        return None
    except Exception as e:
        return pd.DataFrame({"Error": [str(e)]})
def handle_token_change(token):
    """
    Called when the token is entered. Fetches user info, updates UI interactivity,
    and auto-fills the author fields in both tabs.
    """
    if not token:
        updates = {
            manage_files_btn: gr.update(interactive=False), archive_repo_btn: gr.update(interactive=True), delete_repo_btn: gr.update(interactive=False),
            commit_btn: gr.update(interactive=False), author_input: gr.update(value=""),
            download_author_input: gr.update(value=""), whoami_output: gr.update(value=None, visible=False)
        }
        return (None, *updates.values())
    try:
        api = get_hf_api(token)
        user_info = api.whoami()
        username = user_info.get('name')
        updates = {
            manage_files_btn: gr.update(interactive=True), archive_repo_btn: gr.update(interactive=True), delete_repo_btn: gr.update(interactive=True),
            commit_btn: gr.update(interactive=True), author_input: gr.update(value=username),
            download_author_input: gr.update(value=username), whoami_output: gr.update(value=user_info, visible=True)
        }
        return (token, *updates.values())
    except HfHubHTTPError as e:
        gr.Warning(f"Invalid Token: {e}. You can only perform read-only actions.")
        updates = {
            manage_files_btn: gr.update(interactive=False), archive_repo_btn: gr.update(interactive=False), delete_repo_btn: gr.update(interactive=False),
            commit_btn: gr.update(interactive=False), whoami_output: gr.update(value=None, visible=False)
        }
        return (token, *updates.values())

def list_repos_backend(token, author, repo_type):
    """Backend function to fetch repository IDs."""
    if not author:
        gr.Info("Please enter an author (username or organization).")
        return []
    try:
        api = get_hf_api(token)
        list_fn = getattr(api, f"list_{repo_type}s")
        repos = list_fn(author=author)
        repo_ids = [repo.id for repo in repos]
        gr.Info(f"Found {len(repo_ids)} {repo_type}s for '{author}'.")
        return repo_ids
    except HfHubHTTPError as e:
        gr.Error(f"Could not list repositories: {e}")
        return []

def list_repos_for_management(token, author, repo_type):
    """Gradio wrapper to update the management dropdown and reset the UI."""
    repo_ids = list_repos_backend(token, author, repo_type)
    return (
        repo_type,
        gr.update(choices=repo_ids, value=None),
        gr.update(visible=False),
        gr.update(visible=False)
    )

def list_repos_for_download(token, author, repo_type):
    """Gradio wrapper to update the download dropdown."""
    repo_ids = list_repos_backend(token, author, repo_type)
    return repo_type, gr.update(choices=repo_ids, value=None)

def on_manage_repo_select(repo_id):
    """Shows action buttons when a repo is selected in the Manage tab."""
    return gr.update(visible=bool(repo_id)),repo_id

def delete_repo(token, repo_id, repo_type):
    """Deletes the selected repository."""
    if not token:
        gr.Error("A write-enabled Hugging Face token is required to delete a repository.")
        return repo_id, gr.update(visible=True), gr.update(visible=False)
    try:
        api = get_hf_api(token)
        api.delete_repo(repo_id=repo_id, repo_type=repo_type)
        gr.Info(f"Successfully deleted '{repo_id}'.")
        return None, gr.update(visible=False), gr.update(visible=False)
    except HfHubHTTPError as e:
        gr.Error(f"Failed to delete repository: {e}")
        return repo_id, gr.update(visible=True), gr.update(visible=False)

def archive_repo(token, repo_id, archive_repo, manage_repo_type_state):
    """Deletes the selected repository."""
    if not token:
        gr.Error("A write-enabled Hugging Face token is required to delete a repository.")
        return repo_id, gr.update(visible=True), gr.update(visible=False)
    try:
        api = get_hf_api(token)
        archive_space=f'{archive_repo}/{repo_id.split("/")[1]}'
        print(f'moving from {repo_id} to {archive_space}')
        api.move_repo(from_id=repo_id, to_id=archive_space, repo_type=manage_repo_type_state)
        gr.Info(f"Successfully moved to '{archive_space}'.")
        return None, gr.update(visible=False), gr.update(visible=False)
    except HfHubHTTPError as e:
        gr.Error(f"Failed to archive repository: {e}")
        print(e)
        return repo_id, gr.update(visible=True), gr.update(visible=False)
# --- File Management Functions ---

def show_files_and_load_first(token, repo_id, repo_type):
    """Lists files and pre-loads the first one (Text OR DataFrame)."""
    if not repo_id:
        return gr.update(visible=False), gr.update(choices=[], value=None), gr.update(visible=False), gr.update(visible=False)
    try:
        api = get_hf_api(token)
        repo_files = api.list_repo_files(repo_id=repo_id, repo_type=repo_type)
        filtered_files = [f for f in repo_files if not f.startswith('.')]

        if not filtered_files:
            return (gr.update(visible=True), gr.update(choices=[], value=None),
                    gr.update(value="## Empty Repo", visible=True), gr.update(visible=False))
        
        first_file = filtered_files[0]
        
        # Check type and load appropriately
        if is_tabular(first_file):
            df = load_tabular_content(token, repo_id, repo_type, first_file)
            return (gr.update(visible=True), gr.update(choices=filtered_files, value=first_file),
                    gr.update(visible=False), gr.update(value=df, visible=True))
        else:
            content, lang = load_file_content_backend(token, repo_id, repo_type, first_file)
            return (gr.update(visible=True), gr.update(choices=filtered_files, value=first_file),
                    gr.update(value=content, language=lang, visible=True), gr.update(visible=False))
            
    except Exception as e:
        gr.Error(f"Error: {e}")
        return gr.update(visible=False), gr.update(choices=[], value=None), gr.update(visible=True), gr.update(visible=False)

def load_file_content_backend(token, repo_id, repo_type, filepath):
    """Backend logic to fetch and format file content."""
    if not filepath: return "## Select a file to view.", 'markdown'
    try:
        api = get_hf_api(token)
        local_path = api.hf_hub_download(repo_id=repo_id, repo_type=repo_type, filename=filepath, token=token)
        with open(local_path, 'r', encoding='utf-8') as f: content = f.read()
        
        ext = os.path.splitext(filepath)[1].lstrip('.').lower()
        lang_map = {'py': 'python', 'js': 'javascript', 'md': 'markdown'}
        language = lang_map.get(ext, 'python')
        return content, language
    except Exception as e:
        return f"Error loading file: {e}", 'python'

def load_file_content_for_editor(token, repo_id, repo_type, filepath):
    """Decides whether to show the Code Editor or the Data Editor."""
    if is_tabular(filepath):
        df = load_tabular_content(token, repo_id, repo_type, filepath)
        # Hide Code, Show DF
        return gr.update(visible=False), gr.update(value=df, visible=True)
    else:
        content, language = load_file_content_backend(token, repo_id, repo_type, filepath)
        # Show Code, Hide DF
        return gr.update(value=content, language=language, visible=True), gr.update(visible=False)

def commit_file(token, repo_id, repo_type, filepath, code_content, df_content, commit_message):
    """Smart commit: saves text OR dataframe based on file extension."""
    if not token: gr.Error("Write-token required."); return
    if not filepath: gr.Warning("No file selected."); return
    
    try:
        api = get_hf_api(token)
        
        if is_tabular(filepath):
            # Handle DataFrame Saving
            buffer = io.BytesIO()
            ext = os.path.splitext(filepath)[1].lower()
            if ext == '.csv': df_content.to_csv(buffer, index=False)
            elif ext == '.tsv': df_content.to_csv(buffer, sep='\t', index=False)
            elif ext == '.parquet': df_content.to_parquet(buffer, index=False)
            elif ext == '.json': df_content.to_json(buffer, orient='records')
            elif ext == '.jsonl': df_content.to_json(buffer, orient='records', lines=True)
            
            data_to_upload = buffer.getvalue()
        else:
            # Handle Text Saving
            data_to_upload = bytes(code_content, 'utf-8')

        api.upload_file(path_or_fileobj=data_to_upload, path_in_repo=filepath,
                        repo_id=repo_id, repo_type=repo_type, commit_message=commit_message)
        gr.Info(f"Successfully committed '{filepath}'!")
    except Exception as e: gr.Error(f"Failed to commit: {e}")

# --- Download Tab Functions ---

def download_repos_as_zip(token, selected_repo_ids, repo_type, progress=gr.Progress()):
    """Downloads selected repos and zips them."""
    if not selected_repo_ids:
        gr.Warning("No repositories selected for download."); return gr.update(value=None, visible=False)
    if not repo_type:
        gr.Warning("Please list a repository type (Spaces, etc.) before downloading."); return gr.update(value=None, visible=False)

    download_root_dir = tempfile.mkdtemp()
    try:
        total_repos = len(selected_repo_ids)
        for i, repo_id in enumerate(selected_repo_ids):
            progress((i) / total_repos, desc=f"Downloading {repo_id} ({i+1}/{total_repos})")
            try:
                folder_name = repo_id.replace("/", "__")
                snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=os.path.join(download_root_dir, folder_name),
                                  token=token, local_dir_use_symlinks=False, resume_download=True)
            except Exception as e: gr.Error(f"Failed to download {repo_id}: {e}")
        
        progress(0.95, desc="All items downloaded. Creating ZIP file...")
        zip_base_name = os.path.join(tempfile.gettempdir(), f"hf_{repo_type}s_archive_{uuid.uuid4().hex}")
        zip_path = shutil.make_archive(zip_base_name, 'zip', download_root_dir)
        progress(1, desc="Download ready!")
        gr.Info("ZIP file created successfully!")
        return gr.update(value=zip_path, visible=True)
    finally:
        shutil.rmtree(download_root_dir, ignore_errors=True)

# --- Gradio UI Layout ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Hugging Face Hub Toolkit") as demo:
    # State management
    hf_token_state = gr.State(None)
    manage_repo_type_state = gr.State(None)
    download_repo_type_state = gr.State(None)
    selected_repo = gr.State(None)

    gr.Markdown("# Hugging Face Hub Toolkit")
    with gr.Sidebar():
        hf_token = gr.Textbox(label="Hugging Face API Token", type="password", placeholder="hf_...")
        whoami_output = gr.JSON(label="Authenticated User", visible=False)

    with gr.Tabs():
        with gr.TabItem("Manage Repositories"):
            # Define all components first
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 1. Select a Repository")
                    author_input = gr.Textbox(label="Author (Username or Org)")
                    with gr.Row():
                        manage_buttons = [gr.Button(f"List {label}") for label in ["Spaces", "Models", "Datasets"]]
                    manage_repo_dropdown = gr.Dropdown(label="Select a Repository", interactive=True)
                with gr.Column(scale=2):
                    with gr.Column(visible=False) as action_panel:
                        gr.Markdown("### 2. Choose an Action")
                        manage_files_btn = gr.Button("Manage Files", interactive=False)
                        delete_repo_btn = gr.Button("Delete This Repo", variant="stop", interactive=False)
                        archive_repo_name = gr.Textbox(label="Archive",placeholder="my_archive, will append /repo_name")
                        archive_repo_btn = gr.Button("Send to Archive", variant="secondary", interactive=False)
                    with gr.Column(visible=False) as editor_panel:
                        gr.Markdown("### 3. Edit Files")
                        file_selector = gr.Dropdown(label="Select File", interactive=True)
                        code_editor = gr.Code(label="File Content", interactive=True)
                        dataframe_editor = gr.Dataframe(label="Dataset Editor", interactive=True, visible=False, wrap=True) 
                        commit_message_input = gr.Textbox(label="Commit Message", placeholder="e.g., Update README.md")
                        commit_btn = gr.Button("Commit Changes", variant="primary", interactive=False)
            # Now, attach event handlers
            repo_types = ["space", "model", "dataset"]
            for i, btn in enumerate(manage_buttons):
                btn.click(fn=list_repos_for_management,
                          inputs=[hf_token_state, author_input, gr.State(repo_types[i])],
                          outputs=[manage_repo_type_state, manage_repo_dropdown, action_panel, editor_panel])

        with gr.TabItem("Bulk Download (ZIP)"):
            # Define all components first
            gr.Markdown("## Download Multiple Repositories as a ZIP")
            download_author_input = gr.Textbox(label="Author (Username or Org)")
            with gr.Row():
                download_buttons = [gr.Button(f"List {label}") for label in ["Spaces", "Models", "Datasets"]]
            download_repo_dropdown = gr.Dropdown(label="Select Repositories", multiselect=True, interactive=True)
            download_btn = gr.Button("Download Selected as ZIP", variant="primary")
            download_output_file = gr.File(label="Your Downloaded ZIP File", visible=False)
            # Now, attach event handlers
            for i, btn in enumerate(download_buttons):
                btn.click(fn=list_repos_for_download,
                          inputs=[hf_token_state, download_author_input, gr.State(repo_types[i])],
                          outputs=[download_repo_type_state, download_repo_dropdown])

    # --- Global and Cross-Tab Event Handlers ---
    hf_token.change(fn=handle_token_change, inputs=hf_token,
                    outputs=[hf_token_state, manage_files_btn, archive_repo_btn, delete_repo_btn, commit_btn, author_input, download_author_input, whoami_output])

    manage_repo_dropdown.select(fn=on_manage_repo_select, inputs=manage_repo_dropdown, outputs=[action_panel, selected_repo])
    
    manage_files_btn.click(fn=show_files_and_load_first,
                           inputs=[hf_token_state, manage_repo_dropdown, manage_repo_type_state],
                           outputs=[editor_panel, file_selector, code_editor, dataframe_editor]) # Added dataframe_editor

    
    archive_repo_btn.click(fn=archive_repo, inputs=[hf_token_state, selected_repo, archive_repo_name, manage_repo_type_state],
                          outputs=[manage_repo_dropdown, action_panel, editor_panel],
                          #js="() => confirm('Are you sure you want to archive this repo?')"
                          )
    
    delete_repo_btn.click(fn=delete_repo, inputs=[hf_token_state, selected_repo, manage_repo_type_state],
                          outputs=[manage_repo_dropdown, action_panel, editor_panel],
                          js="() => confirm('Are you sure you want to permanently delete this repository?')")

    file_selector.change(fn=load_file_content_for_editor,
                         inputs=[hf_token_state, manage_repo_dropdown, manage_repo_type_state, file_selector],
                         outputs=[code_editor, dataframe_editor]) # Added dataframe_editor


    commit_btn.click(fn=commit_file,
                     inputs=[hf_token_state, manage_repo_dropdown, manage_repo_type_state, file_selector, 
                             code_editor, dataframe_editor, # Added dataframe_editor
                             commit_message_input])


    download_btn.click(fn=download_repos_as_zip,
                       inputs=[hf_token_state, download_repo_dropdown, download_repo_type_state],
                       outputs=[download_output_file])

if __name__ == "__main__":
    demo.launch(debug=True)