Spaces:
Sleeping
Sleeping
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| import gradio as gr | |
| from huggingface_hub import HfApi | |
| def format_size(size_bytes: int | None) -> str: | |
| if size_bytes is None: | |
| return "N/A" | |
| for unit in ("B", "KB", "MB", "GB", "TB"): | |
| if abs(size_bytes) < 1024: | |
| return f"{size_bytes:.1f} {unit}" | |
| size_bytes /= 1024 | |
| return f"{size_bytes:.1f} PB" | |
| def fetch_repos( | |
| repo_type: str, username: str, token: str | |
| ) -> list[dict]: | |
| api = HfApi(token=token) | |
| expand = ["private", "lastModified"] | |
| if repo_type == "model": | |
| items = list(api.list_models(author=username, expand=expand)) | |
| elif repo_type == "dataset": | |
| items = list(api.list_datasets(author=username, expand=expand)) | |
| else: | |
| items = list(api.list_spaces(author=username, expand=expand)) | |
| # Fetch usedStorage per repo in parallel (not available via list endpoints) | |
| rt = None if repo_type == "model" else repo_type | |
| size_map: dict[str, int | None] = {} | |
| def _get_size(repo_id: str) -> tuple[str, int | None]: | |
| try: | |
| info = api.repo_info(repo_id, repo_type=rt) | |
| return repo_id, getattr(info, "usedStorage", None) | |
| except Exception: | |
| return repo_id, None | |
| with ThreadPoolExecutor(max_workers=8) as pool: | |
| futures = [pool.submit(_get_size, repo.id) for repo in items] | |
| for fut in as_completed(futures): | |
| repo_id, size = fut.result() | |
| size_map[repo_id] = size | |
| results = [] | |
| for repo in items: | |
| size_bytes = size_map.get(repo.id) | |
| results.append( | |
| { | |
| "id": repo.id, | |
| "private": getattr(repo, "private", False) or False, | |
| "size_bytes": size_bytes, | |
| "size": format_size(size_bytes), | |
| "last_modified": str(getattr(repo, "last_modified", "") or ""), | |
| } | |
| ) | |
| return results | |
| def repos_to_df(repos: list[dict]) -> list[list]: | |
| return [ | |
| [ | |
| False, | |
| r["id"], | |
| "Private" if r["private"] else "Public", | |
| r["size"], | |
| r["last_modified"], | |
| ] | |
| for r in repos | |
| ] | |
| def apply_filters( | |
| repos: list[dict], visibility: str, sort_by: str, search: str = "" | |
| ) -> list[list]: | |
| filtered = repos | |
| if search: | |
| search_lower = search.lower() | |
| filtered = [r for r in filtered if search_lower in r["id"].lower()] | |
| if visibility == "Public": | |
| filtered = [r for r in filtered if not r["private"]] | |
| elif visibility == "Private": | |
| filtered = [r for r in filtered if r["private"]] | |
| if sort_by == "Name": | |
| filtered = sorted(filtered, key=lambda r: r["id"].lower()) | |
| elif sort_by == "Size": | |
| filtered = sorted( | |
| filtered, key=lambda r: r["size_bytes"] or 0, reverse=True | |
| ) | |
| elif sort_by == "Last Modified": | |
| filtered = sorted( | |
| filtered, key=lambda r: r["last_modified"], reverse=True | |
| ) | |
| return repos_to_df(filtered) | |
| def build_and_wire_tab(repo_type: str): | |
| search_box = gr.Textbox( | |
| placeholder="Filter by name...", label="Search", lines=1 | |
| ) | |
| with gr.Row(): | |
| visibility_radio = gr.Radio( | |
| choices=["All", "Public", "Private"], | |
| value="All", | |
| label="Visibility", | |
| ) | |
| sort_dropdown = gr.Dropdown( | |
| choices=["Name", "Size", "Last Modified"], | |
| value="Name", | |
| label="Sort by", | |
| ) | |
| refresh_btn = gr.Button("Refresh", variant="secondary") | |
| with gr.Row(): | |
| select_all_btn = gr.Button("Select All", size="sm") | |
| deselect_all_btn = gr.Button("Deselect All", size="sm") | |
| delete_btn = gr.Button( | |
| "Delete Selected", variant="stop", size="sm" | |
| ) | |
| dataframe = gr.DataFrame( | |
| headers=["Select", "Repository", "Visibility", "Size", "Last Modified"], | |
| datatype=["bool", "str", "str", "str", "str"], | |
| column_widths=["5%", "45%", "12%", "18%", "20%"], | |
| interactive=True, | |
| static_columns=[1, 2, 3, 4], | |
| show_search="none", | |
| type="array", | |
| col_count=(5, "fixed"), | |
| ) | |
| repos_state = gr.State([]) | |
| confirm_panel = gr.Column(visible=False) | |
| with confirm_panel: | |
| confirm_md = gr.Markdown() | |
| with gr.Row(): | |
| yes_btn = gr.Button("Yes, Delete", variant="stop") | |
| cancel_btn = gr.Button("Cancel", variant="secondary") | |
| _repo_type = repo_type | |
| def load_repos( | |
| profile: gr.OAuthProfile | None, | |
| oauth_token: gr.OAuthToken | None, | |
| ): | |
| if profile is None or oauth_token is None: | |
| gr.Info("Please log in first.") | |
| return [], [], gr.update(visible=False), "" | |
| repos = fetch_repos(_repo_type, profile.username, oauth_token.token) | |
| df = repos_to_df( | |
| sorted(repos, key=lambda r: r["id"].lower()) | |
| ) | |
| return repos, df, gr.update(visible=False), "" | |
| def filter_repos(repos, visibility, sort_by, search): | |
| return apply_filters(repos, visibility, sort_by, search) | |
| def select_all(df_data): | |
| if not df_data: | |
| return [] | |
| return [[True, *row[1:]] for row in df_data] | |
| def deselect_all(df_data): | |
| if not df_data: | |
| return [] | |
| return [[False, *row[1:]] for row in df_data] | |
| def confirm_delete(df_data): | |
| if not df_data: | |
| gr.Warning("No repos loaded.") | |
| return gr.update(visible=False), "" | |
| selected = [row[1] for row in df_data if row[0]] | |
| if not selected: | |
| gr.Warning("No repos selected.") | |
| return gr.update(visible=False), "" | |
| repo_list = "\n".join(f"- `{r}`" for r in selected) | |
| msg = ( | |
| f"### Are you sure you want to delete {len(selected)} " | |
| f"repo(s)?\n\nThis action is **irreversible**.\n\n{repo_list}" | |
| ) | |
| return gr.update(visible=True), msg | |
| def execute_delete( | |
| df_data, | |
| repos, | |
| profile: gr.OAuthProfile | None, | |
| oauth_token: gr.OAuthToken | None, | |
| ): | |
| if profile is None or oauth_token is None: | |
| gr.Info("Please log in first.") | |
| return repos, df_data, gr.update(visible=False), "" | |
| selected_ids = {row[1] for row in df_data if row[0]} | |
| if not selected_ids: | |
| return repos, df_data, gr.update(visible=False), "" | |
| api = HfApi(token=oauth_token.token) | |
| rt = None if _repo_type == "model" else _repo_type | |
| deleted = set() | |
| for repo_id in selected_ids: | |
| try: | |
| api.delete_repo(repo_id=repo_id, repo_type=rt) | |
| deleted.add(repo_id) | |
| except Exception as e: | |
| gr.Warning(f"Failed to delete {repo_id}: {e}") | |
| if deleted: | |
| gr.Info(f"Deleted {len(deleted)} repo(s).") | |
| new_repos = [r for r in repos if r["id"] not in deleted] | |
| new_df = [row for row in df_data if row[1] not in deleted] | |
| for row in new_df: | |
| row[0] = False | |
| return new_repos, new_df, gr.update(visible=False), "" | |
| def cancel_delete(): | |
| return gr.update(visible=False), "" | |
| # Wire events | |
| refresh_btn.click( | |
| fn=load_repos, | |
| inputs=[], | |
| outputs=[repos_state, dataframe, confirm_panel, confirm_md], | |
| ) | |
| filter_inputs = [repos_state, visibility_radio, sort_dropdown, search_box] | |
| search_box.change( | |
| fn=filter_repos, inputs=filter_inputs, outputs=[dataframe] | |
| ) | |
| visibility_radio.change( | |
| fn=filter_repos, inputs=filter_inputs, outputs=[dataframe] | |
| ) | |
| sort_dropdown.change( | |
| fn=filter_repos, inputs=filter_inputs, outputs=[dataframe] | |
| ) | |
| select_all_btn.click( | |
| fn=select_all, inputs=[dataframe], outputs=[dataframe] | |
| ) | |
| deselect_all_btn.click( | |
| fn=deselect_all, inputs=[dataframe], outputs=[dataframe] | |
| ) | |
| delete_btn.click( | |
| fn=confirm_delete, | |
| inputs=[dataframe], | |
| outputs=[confirm_panel, confirm_md], | |
| ) | |
| yes_btn.click( | |
| fn=execute_delete, | |
| inputs=[dataframe, repos_state], | |
| outputs=[repos_state, dataframe, confirm_panel, confirm_md], | |
| ) | |
| cancel_btn.click( | |
| fn=cancel_delete, | |
| inputs=[], | |
| outputs=[confirm_panel, confirm_md], | |
| ) | |
| return load_repos, [repos_state, dataframe, confirm_panel, confirm_md] | |
| with gr.Blocks(title="HF Cleaner", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| "# HF Cleaner\n\n" | |
| "List your Hugging Face models, datasets, and spaces " | |
| "— filter by visibility, sort by size, and bulk delete " | |
| "selected repos." | |
| ) | |
| login_btn = gr.LoginButton() | |
| tab_loaders = [] | |
| with gr.Tabs(): | |
| with gr.Tab("Models"): | |
| load_fn, outputs = build_and_wire_tab("model") | |
| tab_loaders.append((load_fn, outputs)) | |
| with gr.Tab("Datasets"): | |
| load_fn, outputs = build_and_wire_tab("dataset") | |
| tab_loaders.append((load_fn, outputs)) | |
| with gr.Tab("Spaces"): | |
| load_fn, outputs = build_and_wire_tab("space") | |
| tab_loaders.append((load_fn, outputs)) | |
| for load_fn, outputs in tab_loaders: | |
| demo.load(fn=load_fn, inputs=[], outputs=outputs) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |