from concurrent.futures import ThreadPoolExecutor, as_completed import gradio as gr from huggingface_hub import HfApi def format_size(size_bytes: int | None) -> str: if size_bytes is None: return "N/A" for unit in ("B", "KB", "MB", "GB", "TB"): if abs(size_bytes) < 1024: return f"{size_bytes:.1f} {unit}" size_bytes /= 1024 return f"{size_bytes:.1f} PB" def fetch_repos( repo_type: str, username: str, token: str ) -> list[dict]: api = HfApi(token=token) expand = ["private", "lastModified"] if repo_type == "model": items = list(api.list_models(author=username, expand=expand)) elif repo_type == "dataset": items = list(api.list_datasets(author=username, expand=expand)) else: items = list(api.list_spaces(author=username, expand=expand)) # Fetch usedStorage per repo in parallel (not available via list endpoints) rt = None if repo_type == "model" else repo_type size_map: dict[str, int | None] = {} def _get_size(repo_id: str) -> tuple[str, int | None]: try: info = api.repo_info(repo_id, repo_type=rt) return repo_id, getattr(info, "usedStorage", None) except Exception: return repo_id, None with ThreadPoolExecutor(max_workers=8) as pool: futures = [pool.submit(_get_size, repo.id) for repo in items] for fut in as_completed(futures): repo_id, size = fut.result() size_map[repo_id] = size results = [] for repo in items: size_bytes = size_map.get(repo.id) results.append( { "id": repo.id, "private": getattr(repo, "private", False) or False, "size_bytes": size_bytes, "size": format_size(size_bytes), "last_modified": str(getattr(repo, "last_modified", "") or ""), } ) return results def repos_to_df(repos: list[dict]) -> list[list]: return [ [ False, r["id"], "Private" if r["private"] else "Public", r["size"], r["last_modified"], ] for r in repos ] def apply_filters( repos: list[dict], visibility: str, sort_by: str, search: str = "" ) -> list[list]: filtered = repos if search: search_lower = search.lower() filtered = [r for r in filtered if search_lower in r["id"].lower()] if visibility == "Public": filtered = [r for r in filtered if not r["private"]] elif visibility == "Private": filtered = [r for r in filtered if r["private"]] if sort_by == "Name": filtered = sorted(filtered, key=lambda r: r["id"].lower()) elif sort_by == "Size": filtered = sorted( filtered, key=lambda r: r["size_bytes"] or 0, reverse=True ) elif sort_by == "Last Modified": filtered = sorted( filtered, key=lambda r: r["last_modified"], reverse=True ) return repos_to_df(filtered) def build_and_wire_tab(repo_type: str): search_box = gr.Textbox( placeholder="Filter by name...", label="Search", lines=1 ) with gr.Row(): visibility_radio = gr.Radio( choices=["All", "Public", "Private"], value="All", label="Visibility", ) sort_dropdown = gr.Dropdown( choices=["Name", "Size", "Last Modified"], value="Name", label="Sort by", ) refresh_btn = gr.Button("Refresh", variant="secondary") with gr.Row(): select_all_btn = gr.Button("Select All", size="sm") deselect_all_btn = gr.Button("Deselect All", size="sm") delete_btn = gr.Button( "Delete Selected", variant="stop", size="sm" ) dataframe = gr.DataFrame( headers=["Select", "Repository", "Visibility", "Size", "Last Modified"], datatype=["bool", "str", "str", "str", "str"], column_widths=["5%", "45%", "12%", "18%", "20%"], interactive=True, static_columns=[1, 2, 3, 4], show_search="none", type="array", col_count=(5, "fixed"), ) repos_state = gr.State([]) confirm_panel = gr.Column(visible=False) with confirm_panel: confirm_md = gr.Markdown() with gr.Row(): yes_btn = gr.Button("Yes, Delete", variant="stop") cancel_btn = gr.Button("Cancel", variant="secondary") _repo_type = repo_type def load_repos( profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ): if profile is None or oauth_token is None: gr.Info("Please log in first.") return [], [], gr.update(visible=False), "" repos = fetch_repos(_repo_type, profile.username, oauth_token.token) df = repos_to_df( sorted(repos, key=lambda r: r["id"].lower()) ) return repos, df, gr.update(visible=False), "" def filter_repos(repos, visibility, sort_by, search): return apply_filters(repos, visibility, sort_by, search) def select_all(df_data): if not df_data: return [] return [[True, *row[1:]] for row in df_data] def deselect_all(df_data): if not df_data: return [] return [[False, *row[1:]] for row in df_data] def confirm_delete(df_data): if not df_data: gr.Warning("No repos loaded.") return gr.update(visible=False), "" selected = [row[1] for row in df_data if row[0]] if not selected: gr.Warning("No repos selected.") return gr.update(visible=False), "" repo_list = "\n".join(f"- `{r}`" for r in selected) msg = ( f"### Are you sure you want to delete {len(selected)} " f"repo(s)?\n\nThis action is **irreversible**.\n\n{repo_list}" ) return gr.update(visible=True), msg def execute_delete( df_data, repos, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ): if profile is None or oauth_token is None: gr.Info("Please log in first.") return repos, df_data, gr.update(visible=False), "" selected_ids = {row[1] for row in df_data if row[0]} if not selected_ids: return repos, df_data, gr.update(visible=False), "" api = HfApi(token=oauth_token.token) rt = None if _repo_type == "model" else _repo_type deleted = set() for repo_id in selected_ids: try: api.delete_repo(repo_id=repo_id, repo_type=rt) deleted.add(repo_id) except Exception as e: gr.Warning(f"Failed to delete {repo_id}: {e}") if deleted: gr.Info(f"Deleted {len(deleted)} repo(s).") new_repos = [r for r in repos if r["id"] not in deleted] new_df = [row for row in df_data if row[1] not in deleted] for row in new_df: row[0] = False return new_repos, new_df, gr.update(visible=False), "" def cancel_delete(): return gr.update(visible=False), "" # Wire events refresh_btn.click( fn=load_repos, inputs=[], outputs=[repos_state, dataframe, confirm_panel, confirm_md], ) filter_inputs = [repos_state, visibility_radio, sort_dropdown, search_box] search_box.change( fn=filter_repos, inputs=filter_inputs, outputs=[dataframe] ) visibility_radio.change( fn=filter_repos, inputs=filter_inputs, outputs=[dataframe] ) sort_dropdown.change( fn=filter_repos, inputs=filter_inputs, outputs=[dataframe] ) select_all_btn.click( fn=select_all, inputs=[dataframe], outputs=[dataframe] ) deselect_all_btn.click( fn=deselect_all, inputs=[dataframe], outputs=[dataframe] ) delete_btn.click( fn=confirm_delete, inputs=[dataframe], outputs=[confirm_panel, confirm_md], ) yes_btn.click( fn=execute_delete, inputs=[dataframe, repos_state], outputs=[repos_state, dataframe, confirm_panel, confirm_md], ) cancel_btn.click( fn=cancel_delete, inputs=[], outputs=[confirm_panel, confirm_md], ) return load_repos, [repos_state, dataframe, confirm_panel, confirm_md] with gr.Blocks(title="HF Cleaner", theme=gr.themes.Soft()) as demo: gr.Markdown( "# HF Cleaner\n\n" "List your Hugging Face models, datasets, and spaces " "— filter by visibility, sort by size, and bulk delete " "selected repos." ) login_btn = gr.LoginButton() tab_loaders = [] with gr.Tabs(): with gr.Tab("Models"): load_fn, outputs = build_and_wire_tab("model") tab_loaders.append((load_fn, outputs)) with gr.Tab("Datasets"): load_fn, outputs = build_and_wire_tab("dataset") tab_loaders.append((load_fn, outputs)) with gr.Tab("Spaces"): load_fn, outputs = build_and_wire_tab("space") tab_loaders.append((load_fn, outputs)) for load_fn, outputs in tab_loaders: demo.load(fn=load_fn, inputs=[], outputs=outputs) if __name__ == "__main__": demo.launch(ssr_mode=False)