amulyalakku commited on
Commit
5925e2b
Β·
verified Β·
1 Parent(s): 9b5b38d

feat: save baseline scores to outputs/baseline_scores.json

Browse files
Files changed (1) hide show
  1. inference.py +256 -0
inference.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Negotiation Environment β€” Baseline Inference Script
3
+
4
+ MANDATORY stdout format:
5
+ [START] task=<task_name> env=<benchmark> model=<model_name>
6
+ [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
7
+ [END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
8
+
9
+ Required env vars:
10
+ API_BASE_URL β€” OpenAI-compatible endpoint
11
+ Default: https://router.huggingface.co/v1 (HF Inference API)
12
+ MODEL_NAME β€” model identifier
13
+ Default: nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8
14
+ HF_TOKEN β€” Hugging Face API key (get free at huggingface.co/settings/tokens)
15
+ ENV_BASE_URL β€” environment server (default http://localhost:8000)
16
+ LOCAL_IMAGE_NAME β€” docker image name if using from_docker_image()
17
+
18
+ NOTE on API key:
19
+ HF_TOKEN is required for the HF Inference API (free tier available).
20
+ Without it, LLM calls will fail and the agent falls back to 'accept' action.
21
+ Get your free token at: https://huggingface.co/settings/tokens
22
+
23
+ Runtime: < 20 min on vcpu=2, memory=8gb.
24
+ """
25
+
26
+ import json
27
+ import os
28
+ import sys
29
+
30
+ from openai import OpenAI
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Config β€” read from environment variables
34
+ # ---------------------------------------------------------------------------
35
+ # Default to HF Inference API + Nemotron 3 Super (the Phase 2 judge model)
36
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
37
+ MODEL_NAME = os.getenv("MODEL_NAME", "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8")
38
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
39
+ ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
40
+ LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "")
41
+
42
+ BENCHMARK = "negotiation_env"
43
+ MAX_STEPS = 20 # safety cap per episode
44
+
45
+ # Validate token β€” warn but don't crash
46
+ if not HF_TOKEN:
47
+ print(
48
+ "WARNING: HF_TOKEN not set. LLM calls will fail. "
49
+ "Get a free token at https://huggingface.co/settings/tokens",
50
+ flush=True,
51
+ )
52
+
53
+ # OpenAI-compatible client pointing at HF Inference API
54
+ client = OpenAI(
55
+ base_url=API_BASE_URL,
56
+ api_key=HF_TOKEN if HF_TOKEN else "hf-no-token",
57
+ )
58
+
59
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
60
+ from negotiation_env import NegotiationEnv, NegotiationAction
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # System prompt β€” tuned for Nemotron 3 Super (agentic, tool-use optimised)
64
+ # ---------------------------------------------------------------------------
65
+ SYSTEM_PROMPT = """You are an expert contract negotiator acting as a Buyer.
66
+ Your goal: reach the best possible deal within budget and constraints.
67
+
68
+ RULES:
69
+ - Never exceed your budget (the price must_have value).
70
+ - Respond ONLY with valid JSON β€” no prose, no markdown fences.
71
+ - JSON must have "action_type". Add "offer", "concession", or "message" as needed.
72
+
73
+ ACTION TYPES:
74
+ propose β€” opening offer (requires "offer" dict)
75
+ counter β€” counter seller's offer (requires "offer" dict)
76
+ concede β€” give ground on one term (requires "concession": {"term":..., "value":...})
77
+ probe β€” ask seller a question to infer their floor (requires "message")
78
+ accept β€” accept seller's current standing offer
79
+ walk_away β€” only if deal is truly impossible after many rounds
80
+
81
+ OFFER KEYS (include only what you're proposing):
82
+ price (float), support_years (int), sla_percent (float),
83
+ source_escrow (bool), training_days (int), liability_cap (float),
84
+ termination_notice (int), price_lock_years (int), data_ownership (bool)
85
+
86
+ STRATEGY:
87
+ 1. Open at 60-70% of budget. Never anchor at the floor.
88
+ 2. Hold firm early β€” do not concede >15% in round 1.
89
+ 3. Use leverage_score: if >0.6 hold firm, if <0.4 consider conceding.
90
+ 4. Watch estimated_seller_floor β€” use it to anchor your counter.
91
+ 5. Accept when seller price <= budget AND all must-haves satisfied.
92
+ 6. For task_4: use probe first to infer seller floor before offering.
93
+
94
+ RESPOND WITH JSON ONLY. Examples:
95
+ {"action_type":"propose","offer":{"price":42000,"support_years":2,"source_escrow":true}}
96
+ {"action_type":"counter","offer":{"price":48000,"support_years":3,"data_ownership":true}}
97
+ {"action_type":"probe","message":"What flexibility do you have on price for a 3-year deal?"}
98
+ {"action_type":"accept"}
99
+ """
100
+
101
+
102
+ def build_prompt(obs) -> str:
103
+ return (
104
+ f"Round {obs.round_number}/{obs.max_rounds} | "
105
+ f"Remaining: {obs.rounds_remaining} | "
106
+ f"Leverage: {obs.leverage_score:.2f} | "
107
+ f"Seller urgency: {obs.seller_urgency_signal} | "
108
+ f"Seller hint: {obs.seller_personality_hint}\n\n"
109
+ f"Seller says: {obs.counterparty_response}\n\n"
110
+ f"Seller offer: {json.dumps(obs.counterparty_offer)}\n\n"
111
+ f"Est. seller floor: {obs.estimated_seller_floor}\n\n"
112
+ f"Your constraints: {json.dumps(obs.buyer_constraints)}\n\n"
113
+ "Respond with JSON action only."
114
+ )
115
+
116
+
117
+ def parse_action(text: str) -> NegotiationAction:
118
+ """Parse LLM output into NegotiationAction. Falls back to accept on failure."""
119
+ try:
120
+ clean = text.strip()
121
+ for fence in ("```json", "```"):
122
+ clean = clean.replace(fence, "")
123
+ data = json.loads(clean.strip())
124
+ return NegotiationAction(
125
+ action_type=data.get("action_type", "counter"),
126
+ offer=data.get("offer"),
127
+ concession=data.get("concession"),
128
+ message=data.get("message"),
129
+ )
130
+ except Exception:
131
+ return NegotiationAction(action_type="accept")
132
+
133
+
134
+ def action_str(action: NegotiationAction) -> str:
135
+ """Compact single-line action string for [STEP] log."""
136
+ parts = [action.action_type]
137
+ if action.offer:
138
+ parts.append(json.dumps(action.offer, separators=(",", ":")))
139
+ if action.concession:
140
+ parts.append(f"concede:{json.dumps(action.concession, separators=(',', ':'))}")
141
+ if action.message:
142
+ msg = action.message[:60].replace("\n", " ")
143
+ parts.append(f'msg:"{msg}"')
144
+ return " ".join(parts)
145
+
146
+
147
+ def run_episode(env: NegotiationEnv, task_id: str) -> dict:
148
+ """Run one full episode. Emits exact [START]/[STEP]/[END] format."""
149
+ last_error = None
150
+ step_rewards = []
151
+ step_num = 0
152
+ success = False
153
+ score = 0.0
154
+
155
+ try:
156
+ result = env.reset(task_id=task_id)
157
+ obs = result.observation
158
+
159
+ # [START] β€” exact required format
160
+ print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
161
+
162
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
163
+
164
+ while not result.done and step_num < MAX_STEPS:
165
+ obs = result.observation
166
+ messages.append({"role": "user", "content": build_prompt(obs)})
167
+
168
+ # LLM call via OpenAI-compatible client
169
+ try:
170
+ resp = client.chat.completions.create(
171
+ model=MODEL_NAME,
172
+ messages=messages,
173
+ max_tokens=256,
174
+ temperature=0.3,
175
+ # Nemotron 3 Super: disable thinking mode for faster inference
176
+ extra_body={"chat_template_kwargs": {"enable_thinking": False}}
177
+ if "nemotron" in MODEL_NAME.lower() else {},
178
+ )
179
+ agent_text = resp.choices[0].message.content or ""
180
+ last_error = None
181
+ except Exception as e:
182
+ agent_text = ""
183
+ last_error = str(e)[:80]
184
+
185
+ messages.append({"role": "assistant", "content": agent_text})
186
+ action = parse_action(agent_text)
187
+ result = env.step(action)
188
+ step_num += 1
189
+
190
+ step_reward = result.reward if result.reward is not None else 0.0
191
+ step_rewards.append(step_reward)
192
+
193
+ # [STEP] β€” exact required format
194
+ print(
195
+ f"[STEP] step={step_num}"
196
+ f" action={action_str(action)}"
197
+ f" reward={step_reward:.2f}"
198
+ f" done={'true' if result.done else 'false'}"
199
+ f" error={'null' if last_error is None else last_error}",
200
+ flush=True,
201
+ )
202
+
203
+ final_obs = result.observation
204
+ score = result.reward if result.reward is not None else 0.0
205
+ success = final_obs.deal_status == "accepted"
206
+
207
+ except Exception as exc:
208
+ last_error = str(exc)[:80]
209
+ score = 0.0
210
+ success = False
211
+ if step_num == 0:
212
+ print(f"[START] task={task_id} env={BENCHMARK} model={MODEL_NAME}", flush=True)
213
+
214
+ # [END] β€” exact required format
215
+ rewards_str = ",".join(f"{r:.2f}" for r in step_rewards) if step_rewards else "0.00"
216
+ print(
217
+ f"[END] success={'true' if success else 'false'}"
218
+ f" steps={step_num}"
219
+ f" score={score:.2f}"
220
+ f" rewards={rewards_str}",
221
+ flush=True,
222
+ )
223
+
224
+ return {"task_id": task_id, "score": score, "success": success, "steps": step_num}
225
+
226
+
227
+ def main():
228
+ tasks = ["task_1", "task_2", "task_3", "task_4"]
229
+ results = []
230
+
231
+ # Save outputs for reproducibility
232
+ os.makedirs("outputs", exist_ok=True)
233
+
234
+ with NegotiationEnv(base_url=ENV_BASE_URL).sync() as env:
235
+ for task_id in tasks:
236
+ res = run_episode(env, task_id)
237
+ results.append(res)
238
+
239
+ avg = sum(r["score"] for r in results) / len(results)
240
+ summary = {
241
+ "model": MODEL_NAME,
242
+ "env": BENCHMARK,
243
+ "average_score": round(avg, 4),
244
+ "results": results,
245
+ }
246
+ print(f"\nAverage score across {len(tasks)} tasks: {avg:.4f}", flush=True)
247
+
248
+ # Write reproducible output file
249
+ with open("outputs/baseline_scores.json", "w") as f:
250
+ import json as _json
251
+ _json.dump(summary, f, indent=2)
252
+ print(f"Scores saved to outputs/baseline_scores.json", flush=True)
253
+
254
+
255
+ if __name__ == "__main__":
256
+ main()