ykjung Claude Sonnet 4.6 commited on
Commit ·
887eaf6
1
Parent(s): 8acad99
feat(rl): 백테스트 성능 지표 + 알파고식 액션 확률 추가
Browse files- _backtest_sync: 학습 후 에피소드 시뮬레이션 → 승률/평균수익/샤프비율 계산
- train_rl: 백테스트 지표를 accuracy(승률), auc(샤프) 컬럼에 저장
- predict_rl: Policy Network softmax로 P(BUY)/P(HOLD)/P(SELL) 추출
- router: 백테스트 결과를 WS complete 메시지에 포함
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- routers/rl.py +4 -1
- services/rl_service.py +122 -4
routers/rl.py
CHANGED
|
@@ -173,7 +173,10 @@ async def websocket_rl_train(websocket: WebSocket):
|
|
| 173 |
)
|
| 174 |
|
| 175 |
_set_job(status="complete", train_progress=100, result=result)
|
| 176 |
-
await _send({"type": "training", "progress": 100
|
|
|
|
|
|
|
|
|
|
| 177 |
await _send({"type": "complete", "result": result})
|
| 178 |
|
| 179 |
logger.info(f"[WS:RLTrain] 학습 완료: {result}")
|
|
|
|
| 173 |
)
|
| 174 |
|
| 175 |
_set_job(status="complete", train_progress=100, result=result)
|
| 176 |
+
await _send({"type": "training", "progress": 100,
|
| 177 |
+
"message": f"백테스트 완료 — 승률 {result.get('winRate', 0):.1f}%, "
|
| 178 |
+
f"평균수익 {result.get('avgTradeReturn', 0):.2f}%, "
|
| 179 |
+
f"샤프 {result.get('sharpeRatio', 0):.2f}"})
|
| 180 |
await _send({"type": "complete", "result": result})
|
| 181 |
|
| 182 |
logger.info(f"[WS:RLTrain] 학습 완료: {result}")
|
services/rl_service.py
CHANGED
|
@@ -201,6 +201,88 @@ def _deserialize_model(model_b64: str):
|
|
| 201 |
pass
|
| 202 |
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
# ─────────────────────────────────────────────────────────
|
| 205 |
# 공개 API
|
| 206 |
# ─────────────────────────────────────────────────────────
|
|
@@ -226,18 +308,24 @@ async def train_rl(
|
|
| 226 |
None, _train_ppo_sync, episodes, total_timesteps, _sync_progress
|
| 227 |
)
|
| 228 |
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
n_features = int(episodes[0]["features"].shape[1]) + 3 # +3 포트폴리오 상태
|
| 232 |
total_steps = sum(len(ep["features"]) for ep in episodes)
|
| 233 |
|
| 234 |
model_data = {
|
| 235 |
"name": model_name,
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
| 238 |
"precision": 0.0,
|
| 239 |
"recall": 0.0,
|
| 240 |
-
"auc": 0.0,
|
| 241 |
"feature_count": n_features,
|
| 242 |
"sample_count": total_steps,
|
| 243 |
"stage": stage, # 학습에 사용된 피처 stage (예측 시 동일 stage 필요)
|
|
@@ -247,6 +335,12 @@ async def train_rl(
|
|
| 247 |
"stage": stage,
|
| 248 |
"n_episodes": len(episodes),
|
| 249 |
"total_timesteps": total_timesteps,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
"model_b64": model_b64,
|
| 251 |
},
|
| 252 |
}
|
|
@@ -261,6 +355,12 @@ async def train_rl(
|
|
| 261 |
"totalTimesteps": total_timesteps,
|
| 262 |
"featureCount": n_features,
|
| 263 |
"sampleCount": total_steps,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
}
|
| 265 |
|
| 266 |
|
|
@@ -323,6 +423,20 @@ async def predict_rl(
|
|
| 323 |
action, _ = model.predict(obs, deterministic=True)
|
| 324 |
action = int(action)
|
| 325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
if action == 1 and not holding:
|
| 327 |
holding = True
|
| 328 |
buy_price = float(prices_arr[i])
|
|
@@ -342,6 +456,10 @@ async def predict_rl(
|
|
| 342 |
"price": float(prices_arr[i]),
|
| 343 |
"holding": holding,
|
| 344 |
"holding_return": round(holding_return * 100, 2),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
}
|
| 346 |
if i < len(raw_features):
|
| 347 |
entry.update(raw_features[i])
|
|
|
|
| 201 |
pass
|
| 202 |
|
| 203 |
|
| 204 |
+
# ─────────────────────────────────────────────────────────
|
| 205 |
+
# 백테스트 (동기 — executor에서 실행)
|
| 206 |
+
# ─────────────────────────────────────────────────────────
|
| 207 |
+
|
| 208 |
+
def _backtest_sync(model, episodes: list[dict], max_episodes: int = 100) -> dict:
|
| 209 |
+
"""
|
| 210 |
+
학습된 모델로 에피소드를 시뮬레이션해 성능 지표를 계산합니다.
|
| 211 |
+
|
| 212 |
+
Returns:
|
| 213 |
+
win_rate : 수익 거래 비율 (0~100%) → accuracy 컬럼에 저장
|
| 214 |
+
avg_trade_ret : 거래당 평균 수익률 % → f1 컬럼에 저장
|
| 215 |
+
sharpe_ratio : 샤프 비율 → auc 컬럼에 저장
|
| 216 |
+
total_trades : 총 거래 횟수
|
| 217 |
+
total_return : 전체 누적 수익률 %
|
| 218 |
+
"""
|
| 219 |
+
sample = episodes[:max_episodes]
|
| 220 |
+
trade_returns: list[float] = []
|
| 221 |
+
episode_returns: list[float] = []
|
| 222 |
+
|
| 223 |
+
for ep in sample:
|
| 224 |
+
holding = False
|
| 225 |
+
buy_price = 0.0
|
| 226 |
+
holding_days = 0
|
| 227 |
+
|
| 228 |
+
for i in range(len(ep["features"])):
|
| 229 |
+
feat = ep["features"][i]
|
| 230 |
+
holding_flag = 1.0 if holding else 0.0
|
| 231 |
+
holding_return = (float(ep["prices"][i]) - buy_price) / buy_price if holding and buy_price > 0 else 0.0
|
| 232 |
+
obs = np.append(feat, [holding_flag, holding_return, float(holding_days)]).astype(np.float32)
|
| 233 |
+
|
| 234 |
+
action, _ = model.predict(obs, deterministic=True)
|
| 235 |
+
action = int(action)
|
| 236 |
+
|
| 237 |
+
if action == 1 and not holding:
|
| 238 |
+
holding = True
|
| 239 |
+
buy_price = float(ep["prices"][i])
|
| 240 |
+
holding_days = 0
|
| 241 |
+
elif action == 2 and holding:
|
| 242 |
+
ret = (float(ep["prices"][i]) - buy_price) / buy_price
|
| 243 |
+
trade_returns.append(ret)
|
| 244 |
+
holding = False
|
| 245 |
+
buy_price = 0.0
|
| 246 |
+
holding_days = 0
|
| 247 |
+
|
| 248 |
+
if holding:
|
| 249 |
+
holding_days += 1
|
| 250 |
+
|
| 251 |
+
# 미청산 포지션 강제 청산
|
| 252 |
+
if holding and buy_price > 0:
|
| 253 |
+
ret = (float(ep["prices"][-1]) - buy_price) / buy_price
|
| 254 |
+
trade_returns.append(ret)
|
| 255 |
+
|
| 256 |
+
if len(ep["prices"]) > 1:
|
| 257 |
+
ep_ret = (float(ep["prices"][-1]) - float(ep["prices"][0])) / float(ep["prices"][0])
|
| 258 |
+
episode_returns.append(ep_ret)
|
| 259 |
+
|
| 260 |
+
n = len(trade_returns)
|
| 261 |
+
if n == 0:
|
| 262 |
+
return {"win_rate": 0.0, "avg_trade_ret": 0.0, "sharpe_ratio": 0.0,
|
| 263 |
+
"total_trades": 0, "total_return": 0.0}
|
| 264 |
+
|
| 265 |
+
wins = sum(1 for r in trade_returns if r > 0)
|
| 266 |
+
win_rate = round(wins / n * 100, 2)
|
| 267 |
+
avg_ret = float(np.mean(trade_returns)) * 100
|
| 268 |
+
total_return = float(np.sum(episode_returns)) / len(episode_returns) * 100 if episode_returns else 0.0
|
| 269 |
+
|
| 270 |
+
# 샤프 비율 (무위험 수익률 0 가정)
|
| 271 |
+
ret_std = float(np.std(trade_returns))
|
| 272 |
+
sharpe = round(float(np.mean(trade_returns)) / ret_std, 3) if ret_std > 0 else 0.0
|
| 273 |
+
|
| 274 |
+
logger.info(
|
| 275 |
+
f"[RL:Backtest] 거래={n}, 승률={win_rate}%, 평균수익={avg_ret:.2f}%, 샤프={sharpe}"
|
| 276 |
+
)
|
| 277 |
+
return {
|
| 278 |
+
"win_rate": win_rate,
|
| 279 |
+
"avg_trade_ret": round(avg_ret, 2),
|
| 280 |
+
"sharpe_ratio": sharpe,
|
| 281 |
+
"total_trades": n,
|
| 282 |
+
"total_return": round(total_return, 2),
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
|
| 286 |
# ─────────────────────────────────────────────────────────
|
| 287 |
# 공개 API
|
| 288 |
# ─────────────────────────────────────────────────────────
|
|
|
|
| 308 |
None, _train_ppo_sync, episodes, total_timesteps, _sync_progress
|
| 309 |
)
|
| 310 |
|
| 311 |
+
# 직렬화 + 백테스트 병렬 실행
|
| 312 |
+
model_b64, bt = await asyncio.gather(
|
| 313 |
+
loop.run_in_executor(None, _serialize_model, model),
|
| 314 |
+
loop.run_in_executor(None, _backtest_sync, model, episodes),
|
| 315 |
+
)
|
| 316 |
|
| 317 |
n_features = int(episodes[0]["features"].shape[1]) + 3 # +3 포트폴리오 상태
|
| 318 |
total_steps = sum(len(ep["features"]) for ep in episodes)
|
| 319 |
|
| 320 |
model_data = {
|
| 321 |
"name": model_name,
|
| 322 |
+
# XGBoost accuracy 자리에 승률, auc 자리에 샤프비율 저장
|
| 323 |
+
# → 기존 모델 목록 UI에서 그대로 성능 지표로 활용 가능
|
| 324 |
+
"accuracy": bt["win_rate"] / 100, # 0~1 범위로 저장 (UI가 % 표시)
|
| 325 |
+
"f1": max(0.0, bt["avg_trade_ret"] / 100),
|
| 326 |
"precision": 0.0,
|
| 327 |
"recall": 0.0,
|
| 328 |
+
"auc": max(0.0, bt["sharpe_ratio"]),
|
| 329 |
"feature_count": n_features,
|
| 330 |
"sample_count": total_steps,
|
| 331 |
"stage": stage, # 학습에 사용된 피처 stage (예측 시 동일 stage 필요)
|
|
|
|
| 335 |
"stage": stage,
|
| 336 |
"n_episodes": len(episodes),
|
| 337 |
"total_timesteps": total_timesteps,
|
| 338 |
+
# 백테스트 지표 (model_json에도 원본 수치 보존)
|
| 339 |
+
"win_rate": bt["win_rate"],
|
| 340 |
+
"avg_trade_ret": bt["avg_trade_ret"],
|
| 341 |
+
"sharpe_ratio": bt["sharpe_ratio"],
|
| 342 |
+
"total_trades": bt["total_trades"],
|
| 343 |
+
"total_return": bt["total_return"],
|
| 344 |
"model_b64": model_b64,
|
| 345 |
},
|
| 346 |
}
|
|
|
|
| 355 |
"totalTimesteps": total_timesteps,
|
| 356 |
"featureCount": n_features,
|
| 357 |
"sampleCount": total_steps,
|
| 358 |
+
# 백테스트 성능 지표
|
| 359 |
+
"winRate": bt["win_rate"],
|
| 360 |
+
"avgTradeReturn": bt["avg_trade_ret"],
|
| 361 |
+
"sharpeRatio": bt["sharpe_ratio"],
|
| 362 |
+
"totalTrades": bt["total_trades"],
|
| 363 |
+
"totalReturn": bt["total_return"],
|
| 364 |
}
|
| 365 |
|
| 366 |
|
|
|
|
| 423 |
action, _ = model.predict(obs, deterministic=True)
|
| 424 |
action = int(action)
|
| 425 |
|
| 426 |
+
# 알파고처럼 각 액션 확률 추출 (Policy Network softmax 출력)
|
| 427 |
+
try:
|
| 428 |
+
import torch
|
| 429 |
+
obs_t = obs.reshape(1, -1)
|
| 430 |
+
obs_tensor, _ = model.policy.obs_to_tensor(obs_t)
|
| 431 |
+
with torch.no_grad():
|
| 432 |
+
dist = model.policy.get_distribution(obs_tensor)
|
| 433 |
+
probs = dist.distribution.probs.squeeze().cpu().numpy()
|
| 434 |
+
prob_hold = round(float(probs[0]) * 100, 1)
|
| 435 |
+
prob_buy = round(float(probs[1]) * 100, 1)
|
| 436 |
+
prob_sell = round(float(probs[2]) * 100, 1)
|
| 437 |
+
except Exception:
|
| 438 |
+
prob_hold = prob_buy = prob_sell = None
|
| 439 |
+
|
| 440 |
if action == 1 and not holding:
|
| 441 |
holding = True
|
| 442 |
buy_price = float(prices_arr[i])
|
|
|
|
| 456 |
"price": float(prices_arr[i]),
|
| 457 |
"holding": holding,
|
| 458 |
"holding_return": round(holding_return * 100, 2),
|
| 459 |
+
# 알파고처럼 각 액션의 확률 (Policy Network 출력)
|
| 460 |
+
"prob_hold": prob_hold,
|
| 461 |
+
"prob_buy": prob_buy,
|
| 462 |
+
"prob_sell": prob_sell,
|
| 463 |
}
|
| 464 |
if i < len(raw_features):
|
| 465 |
entry.update(raw_features[i])
|