backlink-quality-scorer / inference.py
Alireza Aminzadeh
Upload folder using huggingface_hub
585672a verified
"""
Score backlinks: predicted quality and/or risk.
"""
import argparse
from pathlib import Path
import pandas as pd
import joblib
from config import MODEL_DIR, FEATURE_COLUMNS, TARGET_QUALITY, TARGET_RISK
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--input", default="data/backlinks.csv")
parser.add_argument("--output", default="scored_links.csv")
args = parser.parse_args()
if not Path(args.input).exists():
raise FileNotFoundError(f"Input file not found: {args.input}. Run from project root or use an absolute path.")
df = pd.read_csv(args.input)
if "anchor_text" in df.columns and "anchor_length" not in df.columns:
df["anchor_length"] = df["anchor_text"].fillna("").str.len()
features = joblib.load(MODEL_DIR / "feature_columns.joblib") if (MODEL_DIR / "feature_columns.joblib").exists() else [c for c in FEATURE_COLUMNS if c in df.columns]
if not features:
raise ValueError(f"Input must contain at least one of {FEATURE_COLUMNS}")
X = df[[c for c in features if c in df.columns]].fillna(0)
out = df.copy()
if (MODEL_DIR / "quality_model.joblib").exists():
model = joblib.load(MODEL_DIR / "quality_model.joblib")
out[f"pred_{TARGET_QUALITY}"] = model.predict(X)
if (MODEL_DIR / "risk_model.joblib").exists():
model = joblib.load(MODEL_DIR / "risk_model.joblib")
out["pred_risk_label"] = model.predict(X)
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
out.to_csv(args.output, index=False)
print(f"Saved to {args.output}")
if __name__ == "__main__":
main()