hf-cleaner / app.py
gabrielmbmb's picture
Add search bar to filter repos by name
42ea352
from concurrent.futures import ThreadPoolExecutor, as_completed
import gradio as gr
from huggingface_hub import HfApi
def format_size(size_bytes: int | None) -> str:
if size_bytes is None:
return "N/A"
for unit in ("B", "KB", "MB", "GB", "TB"):
if abs(size_bytes) < 1024:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024
return f"{size_bytes:.1f} PB"
def fetch_repos(
repo_type: str, username: str, token: str
) -> list[dict]:
api = HfApi(token=token)
expand = ["private", "lastModified"]
if repo_type == "model":
items = list(api.list_models(author=username, expand=expand))
elif repo_type == "dataset":
items = list(api.list_datasets(author=username, expand=expand))
else:
items = list(api.list_spaces(author=username, expand=expand))
# Fetch usedStorage per repo in parallel (not available via list endpoints)
rt = None if repo_type == "model" else repo_type
size_map: dict[str, int | None] = {}
def _get_size(repo_id: str) -> tuple[str, int | None]:
try:
info = api.repo_info(repo_id, repo_type=rt)
return repo_id, getattr(info, "usedStorage", None)
except Exception:
return repo_id, None
with ThreadPoolExecutor(max_workers=8) as pool:
futures = [pool.submit(_get_size, repo.id) for repo in items]
for fut in as_completed(futures):
repo_id, size = fut.result()
size_map[repo_id] = size
results = []
for repo in items:
size_bytes = size_map.get(repo.id)
results.append(
{
"id": repo.id,
"private": getattr(repo, "private", False) or False,
"size_bytes": size_bytes,
"size": format_size(size_bytes),
"last_modified": str(getattr(repo, "last_modified", "") or ""),
}
)
return results
def repos_to_df(repos: list[dict]) -> list[list]:
return [
[
False,
r["id"],
"Private" if r["private"] else "Public",
r["size"],
r["last_modified"],
]
for r in repos
]
def apply_filters(
repos: list[dict], visibility: str, sort_by: str, search: str = ""
) -> list[list]:
filtered = repos
if search:
search_lower = search.lower()
filtered = [r for r in filtered if search_lower in r["id"].lower()]
if visibility == "Public":
filtered = [r for r in filtered if not r["private"]]
elif visibility == "Private":
filtered = [r for r in filtered if r["private"]]
if sort_by == "Name":
filtered = sorted(filtered, key=lambda r: r["id"].lower())
elif sort_by == "Size":
filtered = sorted(
filtered, key=lambda r: r["size_bytes"] or 0, reverse=True
)
elif sort_by == "Last Modified":
filtered = sorted(
filtered, key=lambda r: r["last_modified"], reverse=True
)
return repos_to_df(filtered)
def build_and_wire_tab(repo_type: str):
search_box = gr.Textbox(
placeholder="Filter by name...", label="Search", lines=1
)
with gr.Row():
visibility_radio = gr.Radio(
choices=["All", "Public", "Private"],
value="All",
label="Visibility",
)
sort_dropdown = gr.Dropdown(
choices=["Name", "Size", "Last Modified"],
value="Name",
label="Sort by",
)
refresh_btn = gr.Button("Refresh", variant="secondary")
with gr.Row():
select_all_btn = gr.Button("Select All", size="sm")
deselect_all_btn = gr.Button("Deselect All", size="sm")
delete_btn = gr.Button(
"Delete Selected", variant="stop", size="sm"
)
dataframe = gr.DataFrame(
headers=["Select", "Repository", "Visibility", "Size", "Last Modified"],
datatype=["bool", "str", "str", "str", "str"],
column_widths=["5%", "45%", "12%", "18%", "20%"],
interactive=True,
static_columns=[1, 2, 3, 4],
show_search="none",
type="array",
col_count=(5, "fixed"),
)
repos_state = gr.State([])
confirm_panel = gr.Column(visible=False)
with confirm_panel:
confirm_md = gr.Markdown()
with gr.Row():
yes_btn = gr.Button("Yes, Delete", variant="stop")
cancel_btn = gr.Button("Cancel", variant="secondary")
_repo_type = repo_type
def load_repos(
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
):
if profile is None or oauth_token is None:
gr.Info("Please log in first.")
return [], [], gr.update(visible=False), ""
repos = fetch_repos(_repo_type, profile.username, oauth_token.token)
df = repos_to_df(
sorted(repos, key=lambda r: r["id"].lower())
)
return repos, df, gr.update(visible=False), ""
def filter_repos(repos, visibility, sort_by, search):
return apply_filters(repos, visibility, sort_by, search)
def select_all(df_data):
if not df_data:
return []
return [[True, *row[1:]] for row in df_data]
def deselect_all(df_data):
if not df_data:
return []
return [[False, *row[1:]] for row in df_data]
def confirm_delete(df_data):
if not df_data:
gr.Warning("No repos loaded.")
return gr.update(visible=False), ""
selected = [row[1] for row in df_data if row[0]]
if not selected:
gr.Warning("No repos selected.")
return gr.update(visible=False), ""
repo_list = "\n".join(f"- `{r}`" for r in selected)
msg = (
f"### Are you sure you want to delete {len(selected)} "
f"repo(s)?\n\nThis action is **irreversible**.\n\n{repo_list}"
)
return gr.update(visible=True), msg
def execute_delete(
df_data,
repos,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
):
if profile is None or oauth_token is None:
gr.Info("Please log in first.")
return repos, df_data, gr.update(visible=False), ""
selected_ids = {row[1] for row in df_data if row[0]}
if not selected_ids:
return repos, df_data, gr.update(visible=False), ""
api = HfApi(token=oauth_token.token)
rt = None if _repo_type == "model" else _repo_type
deleted = set()
for repo_id in selected_ids:
try:
api.delete_repo(repo_id=repo_id, repo_type=rt)
deleted.add(repo_id)
except Exception as e:
gr.Warning(f"Failed to delete {repo_id}: {e}")
if deleted:
gr.Info(f"Deleted {len(deleted)} repo(s).")
new_repos = [r for r in repos if r["id"] not in deleted]
new_df = [row for row in df_data if row[1] not in deleted]
for row in new_df:
row[0] = False
return new_repos, new_df, gr.update(visible=False), ""
def cancel_delete():
return gr.update(visible=False), ""
# Wire events
refresh_btn.click(
fn=load_repos,
inputs=[],
outputs=[repos_state, dataframe, confirm_panel, confirm_md],
)
filter_inputs = [repos_state, visibility_radio, sort_dropdown, search_box]
search_box.change(
fn=filter_repos, inputs=filter_inputs, outputs=[dataframe]
)
visibility_radio.change(
fn=filter_repos, inputs=filter_inputs, outputs=[dataframe]
)
sort_dropdown.change(
fn=filter_repos, inputs=filter_inputs, outputs=[dataframe]
)
select_all_btn.click(
fn=select_all, inputs=[dataframe], outputs=[dataframe]
)
deselect_all_btn.click(
fn=deselect_all, inputs=[dataframe], outputs=[dataframe]
)
delete_btn.click(
fn=confirm_delete,
inputs=[dataframe],
outputs=[confirm_panel, confirm_md],
)
yes_btn.click(
fn=execute_delete,
inputs=[dataframe, repos_state],
outputs=[repos_state, dataframe, confirm_panel, confirm_md],
)
cancel_btn.click(
fn=cancel_delete,
inputs=[],
outputs=[confirm_panel, confirm_md],
)
return load_repos, [repos_state, dataframe, confirm_panel, confirm_md]
with gr.Blocks(title="HF Cleaner", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# HF Cleaner\n\n"
"List your Hugging Face models, datasets, and spaces "
"— filter by visibility, sort by size, and bulk delete "
"selected repos."
)
login_btn = gr.LoginButton()
tab_loaders = []
with gr.Tabs():
with gr.Tab("Models"):
load_fn, outputs = build_and_wire_tab("model")
tab_loaders.append((load_fn, outputs))
with gr.Tab("Datasets"):
load_fn, outputs = build_and_wire_tab("dataset")
tab_loaders.append((load_fn, outputs))
with gr.Tab("Spaces"):
load_fn, outputs = build_and_wire_tab("space")
tab_loaders.append((load_fn, outputs))
for load_fn, outputs in tab_loaders:
demo.load(fn=load_fn, inputs=[], outputs=outputs)
if __name__ == "__main__":
demo.launch(ssr_mode=False)