""" Score backlinks: predicted quality and/or risk. """ import argparse from pathlib import Path import pandas as pd import joblib from config import MODEL_DIR, FEATURE_COLUMNS, TARGET_QUALITY, TARGET_RISK def main(): parser = argparse.ArgumentParser() parser.add_argument("--input", default="data/backlinks.csv") parser.add_argument("--output", default="scored_links.csv") args = parser.parse_args() if not Path(args.input).exists(): raise FileNotFoundError(f"Input file not found: {args.input}. Run from project root or use an absolute path.") df = pd.read_csv(args.input) if "anchor_text" in df.columns and "anchor_length" not in df.columns: df["anchor_length"] = df["anchor_text"].fillna("").str.len() features = joblib.load(MODEL_DIR / "feature_columns.joblib") if (MODEL_DIR / "feature_columns.joblib").exists() else [c for c in FEATURE_COLUMNS if c in df.columns] if not features: raise ValueError(f"Input must contain at least one of {FEATURE_COLUMNS}") X = df[[c for c in features if c in df.columns]].fillna(0) out = df.copy() if (MODEL_DIR / "quality_model.joblib").exists(): model = joblib.load(MODEL_DIR / "quality_model.joblib") out[f"pred_{TARGET_QUALITY}"] = model.predict(X) if (MODEL_DIR / "risk_model.joblib").exists(): model = joblib.load(MODEL_DIR / "risk_model.joblib") out["pred_risk_label"] = model.predict(X) Path(args.output).parent.mkdir(parents=True, exist_ok=True) out.to_csv(args.output, index=False) print(f"Saved to {args.output}") if __name__ == "__main__": main()