keyword-cluster-seo / inference.py
Alireza Aminzadeh
Upload folder using huggingface_hub
4d481d9 verified
"""
Assign cluster labels to keywords (CSV or list).
"""
import argparse
from pathlib import Path
import pandas as pd
import joblib
from config import MODEL_DIR, KEYWORD_COLUMN
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input", default="data/keywords.csv", help="CSV with keyword column")
parser.add_argument("--output", default="data/clustered.csv", help="Output CSV with cluster column")
args = parser.parse_args()
if not Path(args.input).exists():
raise FileNotFoundError(f"Input file not found: {args.input}. Run from project root or use an absolute path.")
if not (MODEL_DIR / "encoder.joblib").exists():
raise FileNotFoundError(f"Run train.py first. No model in {MODEL_DIR}")
encoder = joblib.load(MODEL_DIR / "encoder.joblib")
clusterer = joblib.load(MODEL_DIR / "clusterer.joblib")
df = pd.read_csv(args.input)
if KEYWORD_COLUMN not in df.columns:
raise ValueError(f"CSV must have column: {KEYWORD_COLUMN}")
X = encoder.encode(df[KEYWORD_COLUMN].astype(str).tolist())
df["cluster"] = clusterer.predict(X)
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
df.to_csv(args.output, index=False)
print(f"Saved to {args.output}")
if __name__ == "__main__":
main()