Alireza Aminzadeh
Upload folder using huggingface_hub
f91e1ac verified
"""
Content-Freshness-Scorer Space. Self-contained: downloads repo for model files only.
"""
import traceback
from pathlib import Path
import gradio as gr
REPO_ID = "syeedalireza/content-freshness-scorer"
FRESHNESS_HALFLIFE_DAYS = 365.0
def run(content, days_old):
if not (content or "").strip():
return "Enter content text."
try:
from huggingface_hub import snapshot_download
path = Path(snapshot_download(REPO_ID))
model_dir = path / "models"
except Exception:
return f"Download error:\n{traceback.format_exc()}"
if not (model_dir / "encoder.joblib").exists() or not (model_dir / "query_embeddings.joblib").exists():
return "Run train.py first (builds query embeddings from target_queries.txt) and re-upload."
try:
import joblib
import numpy as np
encoder = joblib.load(model_dir / "encoder.joblib")
query_emb = joblib.load(model_dir / "query_embeddings.joblib")
content_emb = encoder.encode([content.strip()])
sim = np.dot(content_emb, query_emb.T).max(axis=1)[0]
relevance = float(sim)
days = float(days_old) if days_old is not None else 0.0
freshness = min(1.0, 0.5 ** (days / FRESHNESS_HALFLIFE_DAYS)) if days >= 0 else 1.0
refresh_priority = 0.5 * relevance + 0.5 * (1 - freshness)
return f"**Relevance (vs queries):** {relevance:.3f}\n**Freshness score:** {freshness:.3f}\n**Refresh priority:** {refresh_priority:.3f}"
except Exception:
return f"Error:\n{traceback.format_exc()}"
demo = gr.Interface(
fn=run,
inputs=[
gr.Textbox(label="Content", placeholder="Paste page content...", lines=5),
gr.Number(label="Days since last modified", value=0),
],
outputs=gr.Textbox(label="Scores"),
title="Content-Freshness-Scorer",
description="Relevance and freshness. Model: syeedalireza/content-freshness-scorer",
)
if __name__ == "__main__":
demo.launch()