Spaces:

amulyalakku
/

negotiation-env

Sleeping

App Files Files Community

amulyalakku commited on 18 days ago

Commit

5925e2b

verified ·

1 Parent(s): 9b5b38d

feat: save baseline scores to outputs/baseline_scores.json

Browse files

Files changed (1) hide show

inference.py +256 -0

inference.py ADDED Viewed

	@@ -0,0 +1,256 @@

+"""
+Negotiation Environment — Baseline Inference Script
+MANDATORY stdout format:
+  [START] task=<task_name> env=<benchmark> model=<model_name>
+  [STEP]  step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
+  [END]   success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
+Required env vars:
+    API_BASE_URL      — OpenAI-compatible endpoint
+                        Default: https://router.huggingface.co/v1  (HF Inference API)
+    MODEL_NAME        — model identifier
+                        Default: nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8
+    HF_TOKEN          — Hugging Face API key (get free at huggingface.co/settings/tokens)
+    ENV_BASE_URL      — environment server (default http://localhost:8000)
+    LOCAL_IMAGE_NAME  — docker image name if using from_docker_image()
+NOTE on API key:
+    HF_TOKEN is required for the HF Inference API (free tier available).
+    Without it, LLM calls will fail and the agent falls back to 'accept' action.
+    Get your free token at: https://huggingface.co/settings/tokens
+Runtime: < 20 min on vcpu=2, memory=8gb.
+"""
+import json
+import os
+import sys
+from openai import OpenAI
+# ---------------------------------------------------------------------------
+# Config — read from environment variables
+# ---------------------------------------------------------------------------
+# Default to HF Inference API + Nemotron 3 Super (the Phase 2 judge model)
+API_BASE_URL     = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME       = os.getenv("MODEL_NAME",   "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8")
+HF_TOKEN         = os.getenv("HF_TOKEN",     "")
+ENV_BASE_URL     = os.getenv("ENV_BASE_URL", "http://localhost:8000")
+LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "")
+BENCHMARK = "negotiation_env"
+MAX_STEPS = 20  # safety cap per episode
+# Validate token — warn but don't crash
+if not HF_TOKEN:
+    print(
+        "WARNING: HF_TOKEN not set. LLM calls will fail. "
+        "Get a free token at https://huggingface.co/settings/tokens",
+        flush=True,
+    )
+# OpenAI-compatible client pointing at HF Inference API
+client = OpenAI(
+    base_url=API_BASE_URL,
+    api_key=HF_TOKEN if HF_TOKEN else "hf-no-token",
+)
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from negotiation_env import NegotiationEnv, NegotiationAction
+# ---------------------------------------------------------------------------
+# System prompt — tuned for Nemotron 3 Super (agentic, tool-use optimised)
+# ---------------------------------------------------------------------------
+SYSTEM_PROMPT = """You are an expert contract negotiator acting as a Buyer.
+Your goal: reach the best possible deal within budget and constraints.
+RULES:
+- Never exceed your budget (the price must_have value).
+- Respond ONLY with valid JSON — no prose, no markdown fences.
+- JSON must have "action_type". Add "offer", "concession", or "message" as needed.
+ACTION TYPES:
+  propose    — opening offer (requires "offer" dict)
+  counter    — counter seller's offer (requires "offer" dict)
+  concede    — give ground on one term (requires "concession": {"term":..., "value":...})
+  probe      — ask seller a question to infer their floor (requires "message")
+  accept     — accept seller's current standing offer
+  walk_away  — only if deal is truly impossible after many rounds
+OFFER KEYS (include only what you're proposing):
+  price (float), support_years (int), sla_percent (float),
+  source_escrow (bool), training_days (int), liability_cap (float),
+  termination_notice (int), price_lock_years (int), data_ownership (bool)
+STRATEGY:
+1. Open at 60-70% of budget. Never anchor at the floor.
+2. Hold firm early — do not concede >15% in round 1.
+3. Use leverage_score: if >0.6 hold firm, if <0.4 consider conceding.
+4. Watch estimated_seller_floor — use it to anchor your counter.
+5. Accept when seller price <= budget AND all must-haves satisfied.
+6. For task_4: use probe first to infer seller floor before offering.
+RESPOND WITH JSON ONLY. Examples:
+{"action_type":"propose","offer":{"price":42000,"support_years":2,"source_escrow":true}}
+{"action_type":"counter","offer":{"price":48000,"support_years":3,"data_ownership":true}}
+{"action_type":"probe","message":"What flexibility do you have on price for a 3-year deal?"}
+{"action_type":"accept"}
+"""
+def build_prompt(obs) -> str:
+    return (
+        f"Round {obs.round_number}/{obs.max_rounds} | "
+        f"Remaining: {obs.rounds_remaining} | "
+        f"Leverage: {obs.leverage_score:.2f} | "
+        f"Seller urgency: {obs.seller_urgency_signal} | "
+        f"Seller hint: {obs.seller_personality_hint}\n\n"
+        f"Seller says: {obs.counterparty_response}\n\n"
+        f"Seller offer: {json.dumps(obs.counterparty_offer)}\n\n"
+        f"Est. seller floor: {obs.estimated_seller_floor}\n\n"
+        f"Your constraints: {json.dumps(obs.buyer_constraints)}\n\n"
+        "Respond with JSON action only."
+    )
+def parse_action(text: str) -> NegotiationAction:
+    """Parse LLM output into NegotiationAction. Falls back to accept on failure."""
+    try:
+        clean = text.strip()
+        for fence in ("```json", "```"):
+            clean = clean.replace(fence, "")
+        data = json.loads(clean.strip())
+        return NegotiationAction(
+            action_type=data.get("action_type", "counter"),
+            offer=data.get("offer"),
+            concession=data.get("concession"),
+            message=data.get("message"),
+        )
+    except Exception:
+        return NegotiationAction(action_type="accept")
+def action_str(action: NegotiationAction) -> str:
+    """Compact single-line action string for [STEP] log."""
+    parts = [action.action_type]
+    if action.offer:
+        parts.append(json.dumps(action.offer, separators=(",", ":")))
+    if action.concession:
+        parts.append(f"concede:{json.dumps(action.concession, separators=(',', ':'))}")
+    if action.message:
+        msg = action.message[:60].replace("\n", " ")
+        parts.append(f'msg:"{msg}"')
+    return " ".join(parts)
+def run_episode(env: NegotiationEnv, task_id: str) -> dict:
+    """Run one full episode. Emits exact [START]/[STEP]/[END] format."""
+    last_error = None
+    step_rewards = []
+    step_num = 0
+    success = False
+    score = 0.0
+    try:
+        result = env.reset(task_id=task_id)
+        obs = result.observation
+        # [START] — exact required format
+        print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+        while not result.done and step_num < MAX_STEPS:
+            obs = result.observation
+            messages.append({"role": "user", "content": build_prompt(obs)})
+            # LLM call via OpenAI-compatible client
+            try:
+                resp = client.chat.completions.create(
+                    model=MODEL_NAME,
+                    messages=messages,
+                    max_tokens=256,
+                    temperature=0.3,
+                    # Nemotron 3 Super: disable thinking mode for faster inference
+                    extra_body={"chat_template_kwargs": {"enable_thinking": False}}
+                    if "nemotron" in MODEL_NAME.lower() else {},
+                )
+                agent_text = resp.choices[0].message.content or ""
+                last_error = None
+            except Exception as e:
+                agent_text = ""
+                last_error = str(e)[:80]
+            messages.append({"role": "assistant", "content": agent_text})
+            action = parse_action(agent_text)
+            result = env.step(action)
+            step_num += 1
+            step_reward = result.reward if result.reward is not None else 0.0
+            step_rewards.append(step_reward)
+            # [STEP] — exact required format
+            print(
+                f"[STEP]  step={step_num}"
+                f" action={action_str(action)}"
+                f" reward={step_reward:.2f}"
+                f" done={'true' if result.done else 'false'}"
+                f" error={'null' if last_error is None else last_error}",
+                flush=True,
+            )
+        final_obs = result.observation
+        score = result.reward if result.reward is not None else 0.0
+        success = final_obs.deal_status == "accepted"
+    except Exception as exc:
+        last_error = str(exc)[:80]
+        score = 0.0
+        success = False
+        if step_num == 0:
+            print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
+    # [END] — exact required format
+    rewards_str = ",".join(f"{r:.2f}" for r in step_rewards) if step_rewards else "0.00"
+    print(
+        f"[END]   success={'true' if success else 'false'}"
+        f" steps={step_num}"
+        f" score={score:.2f}"
+        f" rewards={rewards_str}",
+        flush=True,
+    )
+    return {"task_id": task_id, "score": score, "success": success, "steps": step_num}
+def main():
+    tasks = ["task_1", "task_2", "task_3", "task_4"]
+    results = []
+    # Save outputs for reproducibility
+    os.makedirs("outputs", exist_ok=True)
+    with NegotiationEnv(base_url=ENV_BASE_URL).sync() as env:
+        for task_id in tasks:
+            res = run_episode(env, task_id)
+            results.append(res)
+    avg = sum(r["score"] for r in results) / len(results)
+    summary = {
+        "model": MODEL_NAME,
+        "env": BENCHMARK,
+        "average_score": round(avg, 4),
+        "results": results,
+    }
+    print(f"\nAverage score across {len(tasks)} tasks: {avg:.4f}", flush=True)
+    # Write reproducible output file
+    with open("outputs/baseline_scores.json", "w") as f:
+        import json as _json
+        _json.dump(summary, f, indent=2)
+    print(f"Scores saved to outputs/baseline_scores.json", flush=True)
+if __name__ == "__main__":
+    main()