| """ | |
| Content-Freshness-Scorer Space. Self-contained: downloads repo for model files only. | |
| """ | |
| import traceback | |
| from pathlib import Path | |
| import gradio as gr | |
| REPO_ID = "syeedalireza/content-freshness-scorer" | |
| FRESHNESS_HALFLIFE_DAYS = 365.0 | |
| def run(content, days_old): | |
| if not (content or "").strip(): | |
| return "Enter content text." | |
| try: | |
| from huggingface_hub import snapshot_download | |
| path = Path(snapshot_download(REPO_ID)) | |
| model_dir = path / "models" | |
| except Exception: | |
| return f"Download error:\n{traceback.format_exc()}" | |
| if not (model_dir / "encoder.joblib").exists() or not (model_dir / "query_embeddings.joblib").exists(): | |
| return "Run train.py first (builds query embeddings from target_queries.txt) and re-upload." | |
| try: | |
| import joblib | |
| import numpy as np | |
| encoder = joblib.load(model_dir / "encoder.joblib") | |
| query_emb = joblib.load(model_dir / "query_embeddings.joblib") | |
| content_emb = encoder.encode([content.strip()]) | |
| sim = np.dot(content_emb, query_emb.T).max(axis=1)[0] | |
| relevance = float(sim) | |
| days = float(days_old) if days_old is not None else 0.0 | |
| freshness = min(1.0, 0.5 ** (days / FRESHNESS_HALFLIFE_DAYS)) if days >= 0 else 1.0 | |
| refresh_priority = 0.5 * relevance + 0.5 * (1 - freshness) | |
| return f"**Relevance (vs queries):** {relevance:.3f}\n**Freshness score:** {freshness:.3f}\n**Refresh priority:** {refresh_priority:.3f}" | |
| except Exception: | |
| return f"Error:\n{traceback.format_exc()}" | |
| demo = gr.Interface( | |
| fn=run, | |
| inputs=[ | |
| gr.Textbox(label="Content", placeholder="Paste page content...", lines=5), | |
| gr.Number(label="Days since last modified", value=0), | |
| ], | |
| outputs=gr.Textbox(label="Scores"), | |
| title="Content-Freshness-Scorer", | |
| description="Relevance and freshness. Model: syeedalireza/content-freshness-scorer", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |