ykjung Claude Sonnet 4.6 commited on
Commit
887eaf6
·
1 Parent(s): 8acad99

feat(rl): 백테스트 성능 지표 + 알파고식 액션 확률 추가

Browse files

- _backtest_sync: 학습 후 에피소드 시뮬레이션 → 승률/평균수익/샤프비율 계산
- train_rl: 백테스트 지표를 accuracy(승률), auc(샤프) 컬럼에 저장
- predict_rl: Policy Network softmax로 P(BUY)/P(HOLD)/P(SELL) 추출
- router: 백테스트 결과를 WS complete 메시지에 포함

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. routers/rl.py +4 -1
  2. services/rl_service.py +122 -4
routers/rl.py CHANGED
@@ -173,7 +173,10 @@ async def websocket_rl_train(websocket: WebSocket):
173
  )
174
 
175
  _set_job(status="complete", train_progress=100, result=result)
176
- await _send({"type": "training", "progress": 100})
 
 
 
177
  await _send({"type": "complete", "result": result})
178
 
179
  logger.info(f"[WS:RLTrain] 학습 완료: {result}")
 
173
  )
174
 
175
  _set_job(status="complete", train_progress=100, result=result)
176
+ await _send({"type": "training", "progress": 100,
177
+ "message": f"백테스트 완료 — 승률 {result.get('winRate', 0):.1f}%, "
178
+ f"평균수익 {result.get('avgTradeReturn', 0):.2f}%, "
179
+ f"샤프 {result.get('sharpeRatio', 0):.2f}"})
180
  await _send({"type": "complete", "result": result})
181
 
182
  logger.info(f"[WS:RLTrain] 학습 완료: {result}")
services/rl_service.py CHANGED
@@ -201,6 +201,88 @@ def _deserialize_model(model_b64: str):
201
  pass
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  # ─────────────────────────────────────────────────────────
205
  # 공개 API
206
  # ─────────────────────────────────────────────────────────
@@ -226,18 +308,24 @@ async def train_rl(
226
  None, _train_ppo_sync, episodes, total_timesteps, _sync_progress
227
  )
228
 
229
- model_b64 = await loop.run_in_executor(None, _serialize_model, model)
 
 
 
 
230
 
231
  n_features = int(episodes[0]["features"].shape[1]) + 3 # +3 포트폴리오 상태
232
  total_steps = sum(len(ep["features"]) for ep in episodes)
233
 
234
  model_data = {
235
  "name": model_name,
236
- "accuracy": 0.0,
237
- "f1": 0.0,
 
 
238
  "precision": 0.0,
239
  "recall": 0.0,
240
- "auc": 0.0,
241
  "feature_count": n_features,
242
  "sample_count": total_steps,
243
  "stage": stage, # 학습에 사용된 피처 stage (예측 시 동일 stage 필요)
@@ -247,6 +335,12 @@ async def train_rl(
247
  "stage": stage,
248
  "n_episodes": len(episodes),
249
  "total_timesteps": total_timesteps,
 
 
 
 
 
 
250
  "model_b64": model_b64,
251
  },
252
  }
@@ -261,6 +355,12 @@ async def train_rl(
261
  "totalTimesteps": total_timesteps,
262
  "featureCount": n_features,
263
  "sampleCount": total_steps,
 
 
 
 
 
 
264
  }
265
 
266
 
@@ -323,6 +423,20 @@ async def predict_rl(
323
  action, _ = model.predict(obs, deterministic=True)
324
  action = int(action)
325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  if action == 1 and not holding:
327
  holding = True
328
  buy_price = float(prices_arr[i])
@@ -342,6 +456,10 @@ async def predict_rl(
342
  "price": float(prices_arr[i]),
343
  "holding": holding,
344
  "holding_return": round(holding_return * 100, 2),
 
 
 
 
345
  }
346
  if i < len(raw_features):
347
  entry.update(raw_features[i])
 
201
  pass
202
 
203
 
204
+ # ─────────────────────────────────────────────────────────
205
+ # 백테스트 (동기 — executor에서 실행)
206
+ # ─────────────────────────────────────────────────────────
207
+
208
+ def _backtest_sync(model, episodes: list[dict], max_episodes: int = 100) -> dict:
209
+ """
210
+ 학습된 모델로 에피소드를 시뮬레이션해 성능 지표를 계산합니다.
211
+
212
+ Returns:
213
+ win_rate : 수익 거래 비율 (0~100%) → accuracy 컬럼에 저장
214
+ avg_trade_ret : 거래당 평균 수익률 % → f1 컬럼에 저장
215
+ sharpe_ratio : 샤프 비율 → auc 컬럼에 저장
216
+ total_trades : 총 거래 횟수
217
+ total_return : 전체 누적 수익률 %
218
+ """
219
+ sample = episodes[:max_episodes]
220
+ trade_returns: list[float] = []
221
+ episode_returns: list[float] = []
222
+
223
+ for ep in sample:
224
+ holding = False
225
+ buy_price = 0.0
226
+ holding_days = 0
227
+
228
+ for i in range(len(ep["features"])):
229
+ feat = ep["features"][i]
230
+ holding_flag = 1.0 if holding else 0.0
231
+ holding_return = (float(ep["prices"][i]) - buy_price) / buy_price if holding and buy_price > 0 else 0.0
232
+ obs = np.append(feat, [holding_flag, holding_return, float(holding_days)]).astype(np.float32)
233
+
234
+ action, _ = model.predict(obs, deterministic=True)
235
+ action = int(action)
236
+
237
+ if action == 1 and not holding:
238
+ holding = True
239
+ buy_price = float(ep["prices"][i])
240
+ holding_days = 0
241
+ elif action == 2 and holding:
242
+ ret = (float(ep["prices"][i]) - buy_price) / buy_price
243
+ trade_returns.append(ret)
244
+ holding = False
245
+ buy_price = 0.0
246
+ holding_days = 0
247
+
248
+ if holding:
249
+ holding_days += 1
250
+
251
+ # 미청산 포지션 강제 청산
252
+ if holding and buy_price > 0:
253
+ ret = (float(ep["prices"][-1]) - buy_price) / buy_price
254
+ trade_returns.append(ret)
255
+
256
+ if len(ep["prices"]) > 1:
257
+ ep_ret = (float(ep["prices"][-1]) - float(ep["prices"][0])) / float(ep["prices"][0])
258
+ episode_returns.append(ep_ret)
259
+
260
+ n = len(trade_returns)
261
+ if n == 0:
262
+ return {"win_rate": 0.0, "avg_trade_ret": 0.0, "sharpe_ratio": 0.0,
263
+ "total_trades": 0, "total_return": 0.0}
264
+
265
+ wins = sum(1 for r in trade_returns if r > 0)
266
+ win_rate = round(wins / n * 100, 2)
267
+ avg_ret = float(np.mean(trade_returns)) * 100
268
+ total_return = float(np.sum(episode_returns)) / len(episode_returns) * 100 if episode_returns else 0.0
269
+
270
+ # 샤프 비율 (무위험 수익률 0 가정)
271
+ ret_std = float(np.std(trade_returns))
272
+ sharpe = round(float(np.mean(trade_returns)) / ret_std, 3) if ret_std > 0 else 0.0
273
+
274
+ logger.info(
275
+ f"[RL:Backtest] 거래={n}, 승률={win_rate}%, 평균수익={avg_ret:.2f}%, 샤프={sharpe}"
276
+ )
277
+ return {
278
+ "win_rate": win_rate,
279
+ "avg_trade_ret": round(avg_ret, 2),
280
+ "sharpe_ratio": sharpe,
281
+ "total_trades": n,
282
+ "total_return": round(total_return, 2),
283
+ }
284
+
285
+
286
  # ─────────────────────────────────────────────────────────
287
  # 공개 API
288
  # ─────────────────────────────────────────────────────────
 
308
  None, _train_ppo_sync, episodes, total_timesteps, _sync_progress
309
  )
310
 
311
+ # 직렬화 + 백테스트 병렬 실행
312
+ model_b64, bt = await asyncio.gather(
313
+ loop.run_in_executor(None, _serialize_model, model),
314
+ loop.run_in_executor(None, _backtest_sync, model, episodes),
315
+ )
316
 
317
  n_features = int(episodes[0]["features"].shape[1]) + 3 # +3 포트폴리오 상태
318
  total_steps = sum(len(ep["features"]) for ep in episodes)
319
 
320
  model_data = {
321
  "name": model_name,
322
+ # XGBoost accuracy 자리에 승률, auc 자리에 샤프비율 저장
323
+ # → 기존 모델 목록 UI에서 그대로 성능 지표로 활용 가능
324
+ "accuracy": bt["win_rate"] / 100, # 0~1 범위로 저장 (UI가 % 표시)
325
+ "f1": max(0.0, bt["avg_trade_ret"] / 100),
326
  "precision": 0.0,
327
  "recall": 0.0,
328
+ "auc": max(0.0, bt["sharpe_ratio"]),
329
  "feature_count": n_features,
330
  "sample_count": total_steps,
331
  "stage": stage, # 학습에 사용된 피처 stage (예측 시 동일 stage 필요)
 
335
  "stage": stage,
336
  "n_episodes": len(episodes),
337
  "total_timesteps": total_timesteps,
338
+ # 백테스트 지표 (model_json에도 원본 수치 보존)
339
+ "win_rate": bt["win_rate"],
340
+ "avg_trade_ret": bt["avg_trade_ret"],
341
+ "sharpe_ratio": bt["sharpe_ratio"],
342
+ "total_trades": bt["total_trades"],
343
+ "total_return": bt["total_return"],
344
  "model_b64": model_b64,
345
  },
346
  }
 
355
  "totalTimesteps": total_timesteps,
356
  "featureCount": n_features,
357
  "sampleCount": total_steps,
358
+ # 백테스트 성능 지표
359
+ "winRate": bt["win_rate"],
360
+ "avgTradeReturn": bt["avg_trade_ret"],
361
+ "sharpeRatio": bt["sharpe_ratio"],
362
+ "totalTrades": bt["total_trades"],
363
+ "totalReturn": bt["total_return"],
364
  }
365
 
366
 
 
423
  action, _ = model.predict(obs, deterministic=True)
424
  action = int(action)
425
 
426
+ # 알파고처럼 각 액션 확률 추출 (Policy Network softmax 출력)
427
+ try:
428
+ import torch
429
+ obs_t = obs.reshape(1, -1)
430
+ obs_tensor, _ = model.policy.obs_to_tensor(obs_t)
431
+ with torch.no_grad():
432
+ dist = model.policy.get_distribution(obs_tensor)
433
+ probs = dist.distribution.probs.squeeze().cpu().numpy()
434
+ prob_hold = round(float(probs[0]) * 100, 1)
435
+ prob_buy = round(float(probs[1]) * 100, 1)
436
+ prob_sell = round(float(probs[2]) * 100, 1)
437
+ except Exception:
438
+ prob_hold = prob_buy = prob_sell = None
439
+
440
  if action == 1 and not holding:
441
  holding = True
442
  buy_price = float(prices_arr[i])
 
456
  "price": float(prices_arr[i]),
457
  "holding": holding,
458
  "holding_return": round(holding_return * 100, 2),
459
+ # 알파고처럼 각 액션의 확률 (Policy Network 출력)
460
+ "prob_hold": prob_hold,
461
+ "prob_buy": prob_buy,
462
+ "prob_sell": prob_sell,
463
  }
464
  if i < len(raw_features):
465
  entry.update(raw_features[i])