Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Add sandbox costs to usage meter (#311)
Browse files* Add sandbox costs to usage meter
Co-authored-by: OpenAI Codex <codex@openai.com>
* Remove usage rollup buckets
Co-authored-by: OpenAI Codex <codex@openai.com>
* Remove obsolete usage rollup assertions
Co-authored-by: OpenAI Codex <codex@openai.com>
* Remove unused usage window aggregation
Co-authored-by: OpenAI Codex <codex@openai.com>
* Update usage event sort test
Co-authored-by: OpenAI Codex <codex@openai.com>
---------
Co-authored-by: OpenAI Codex <codex@openai.com>
- agent/core/session_persistence.py +7 -2
- backend/models.py +4 -7
- backend/routes/agent.py +0 -2
- backend/session_manager.py +0 -1
- backend/usage.py +114 -65
- frontend/src/components/UsageMeter.tsx +9 -1
- frontend/src/hooks/useAgentChat.ts +1 -1
- frontend/src/lib/sse-chat-transport.ts +2 -1
- frontend/src/store/usageStore.ts +14 -11
- frontend/src/types/events.ts +2 -0
- tests/unit/test_session_persistence.py +1 -1
- tests/unit/test_usage.py +106 -22
agent/core/session_persistence.py
CHANGED
|
@@ -20,7 +20,12 @@ logger = logging.getLogger(__name__)
|
|
| 20 |
|
| 21 |
SCHEMA_VERSION = 1
|
| 22 |
MAX_BSON_BYTES = 15 * 1024 * 1024
|
| 23 |
-
USAGE_EVENT_TYPES = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
def _now() -> datetime:
|
|
@@ -419,7 +424,7 @@ class MongoSessionStore(NoopSessionStore):
|
|
| 419 |
created_at["$lt"] = end
|
| 420 |
event_query["created_at"] = created_at
|
| 421 |
|
| 422 |
-
event_cursor = self.db.session_events.find(event_query)
|
| 423 |
return [row async for row in event_cursor]
|
| 424 |
|
| 425 |
async def append_trace_message(
|
|
|
|
| 20 |
|
| 21 |
SCHEMA_VERSION = 1
|
| 22 |
MAX_BSON_BYTES = 15 * 1024 * 1024
|
| 23 |
+
USAGE_EVENT_TYPES = (
|
| 24 |
+
"llm_call",
|
| 25 |
+
"hf_job_complete",
|
| 26 |
+
"sandbox_create",
|
| 27 |
+
"sandbox_destroy",
|
| 28 |
+
)
|
| 29 |
|
| 30 |
|
| 31 |
def _now() -> datetime:
|
|
|
|
| 424 |
created_at["$lt"] = end
|
| 425 |
event_query["created_at"] = created_at
|
| 426 |
|
| 427 |
+
event_cursor = self.db.session_events.find(event_query).sort("created_at", 1)
|
| 428 |
return [row async for row in event_cursor]
|
| 429 |
|
| 430 |
async def append_trace_message(
|
backend/models.py
CHANGED
|
@@ -122,23 +122,23 @@ class SessionYoloRequest(BaseModel):
|
|
| 122 |
|
| 123 |
|
| 124 |
class UsageBucket(BaseModel):
|
| 125 |
-
"""App-attributed usage totals for a session
|
| 126 |
|
| 127 |
session_id: str | None = None
|
| 128 |
-
window_start: str | None = None
|
| 129 |
-
window_end: str | None = None
|
| 130 |
-
timezone: str | None = None
|
| 131 |
total_usd: float = 0.0
|
| 132 |
inference_usd: float = 0.0
|
| 133 |
hf_jobs_estimated_usd: float = 0.0
|
|
|
|
| 134 |
llm_calls: int = 0
|
| 135 |
hf_jobs_count: int = 0
|
|
|
|
| 136 |
prompt_tokens: int = 0
|
| 137 |
completion_tokens: int = 0
|
| 138 |
cache_read_tokens: int = 0
|
| 139 |
cache_creation_tokens: int = 0
|
| 140 |
total_tokens: int = 0
|
| 141 |
hf_jobs_billable_seconds_estimate: int = 0
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
class HfAccountUsageBucket(BaseModel):
|
|
@@ -174,7 +174,6 @@ class HfAccountUsage(BaseModel):
|
|
| 174 |
available: bool = False
|
| 175 |
error: str | None = None
|
| 176 |
current_session: HfAccountUsageBucket | None = None
|
| 177 |
-
today: HfAccountUsageBucket | None = None
|
| 178 |
month: HfAccountUsageBucket | None = None
|
| 179 |
inference_providers_credits: HfInferenceProvidersCredits | None = None
|
| 180 |
|
|
@@ -187,8 +186,6 @@ class UsageResponse(BaseModel):
|
|
| 187 |
generated_at: str
|
| 188 |
timezone: str
|
| 189 |
session: UsageBucket | None = None
|
| 190 |
-
today: UsageBucket
|
| 191 |
-
month: UsageBucket
|
| 192 |
hf_account: HfAccountUsage | None = None
|
| 193 |
links: dict[str, str] = Field(default_factory=dict)
|
| 194 |
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
class UsageBucket(BaseModel):
|
| 125 |
+
"""App-attributed usage totals for a session."""
|
| 126 |
|
| 127 |
session_id: str | None = None
|
|
|
|
|
|
|
|
|
|
| 128 |
total_usd: float = 0.0
|
| 129 |
inference_usd: float = 0.0
|
| 130 |
hf_jobs_estimated_usd: float = 0.0
|
| 131 |
+
sandbox_estimated_usd: float = 0.0
|
| 132 |
llm_calls: int = 0
|
| 133 |
hf_jobs_count: int = 0
|
| 134 |
+
sandbox_count: int = 0
|
| 135 |
prompt_tokens: int = 0
|
| 136 |
completion_tokens: int = 0
|
| 137 |
cache_read_tokens: int = 0
|
| 138 |
cache_creation_tokens: int = 0
|
| 139 |
total_tokens: int = 0
|
| 140 |
hf_jobs_billable_seconds_estimate: int = 0
|
| 141 |
+
sandbox_billable_seconds_estimate: int = 0
|
| 142 |
|
| 143 |
|
| 144 |
class HfAccountUsageBucket(BaseModel):
|
|
|
|
| 174 |
available: bool = False
|
| 175 |
error: str | None = None
|
| 176 |
current_session: HfAccountUsageBucket | None = None
|
|
|
|
| 177 |
month: HfAccountUsageBucket | None = None
|
| 178 |
inference_providers_credits: HfInferenceProvidersCredits | None = None
|
| 179 |
|
|
|
|
| 186 |
generated_at: str
|
| 187 |
timezone: str
|
| 188 |
session: UsageBucket | None = None
|
|
|
|
|
|
|
| 189 |
hf_account: HfAccountUsage | None = None
|
| 190 |
links: dict[str, str] = Field(default_factory=dict)
|
| 191 |
|
backend/routes/agent.py
CHANGED
|
@@ -718,7 +718,6 @@ async def get_usage(
|
|
| 718 |
request: Request,
|
| 719 |
session_id: str | None = None,
|
| 720 |
tz: str | None = None,
|
| 721 |
-
include_rollups: bool = True,
|
| 722 |
user: dict = Depends(get_current_user),
|
| 723 |
) -> dict:
|
| 724 |
"""Return app-attributed usage for the current user."""
|
|
@@ -739,7 +738,6 @@ async def get_usage(
|
|
| 739 |
),
|
| 740 |
session_id=session_id,
|
| 741 |
timezone_name=tz,
|
| 742 |
-
include_rollups=include_rollups,
|
| 743 |
)
|
| 744 |
|
| 745 |
|
|
|
|
| 718 |
request: Request,
|
| 719 |
session_id: str | None = None,
|
| 720 |
tz: str | None = None,
|
|
|
|
| 721 |
user: dict = Depends(get_current_user),
|
| 722 |
) -> dict:
|
| 723 |
"""Return app-attributed usage for the current user."""
|
|
|
|
| 738 |
),
|
| 739 |
session_id=session_id,
|
| 740 |
timezone_name=tz,
|
|
|
|
| 741 |
)
|
| 742 |
|
| 743 |
|
backend/session_manager.py
CHANGED
|
@@ -481,7 +481,6 @@ class SessionManager:
|
|
| 481 |
hf_token=agent_session.hf_token,
|
| 482 |
session_id=agent_session.session_id,
|
| 483 |
timezone_name="UTC",
|
| 484 |
-
include_rollups=False,
|
| 485 |
)
|
| 486 |
spend, billing_source = self._usage_spend_from_response(response)
|
| 487 |
agent_session.usage_warning_spend_cache = {
|
|
|
|
| 481 |
hf_token=agent_session.hf_token,
|
| 482 |
session_id=agent_session.session_id,
|
| 483 |
timezone_name="UTC",
|
|
|
|
| 484 |
)
|
| 485 |
spend, billing_source = self._usage_spend_from_response(response)
|
| 486 |
agent_session.usage_warning_spend_cache = {
|
backend/usage.py
CHANGED
|
@@ -2,13 +2,20 @@
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import logging
|
| 5 |
-
from datetime import UTC, datetime
|
| 6 |
from typing import Any
|
| 7 |
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
| 8 |
|
| 9 |
import httpx
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
|
@@ -112,7 +119,7 @@ def resolve_usage_windows(
|
|
| 112 |
*,
|
| 113 |
now: datetime | None = None,
|
| 114 |
) -> dict[str, datetime | str]:
|
| 115 |
-
"""Return UTC
|
| 116 |
try:
|
| 117 |
tz = ZoneInfo(timezone_name or "UTC")
|
| 118 |
except (ZoneInfoNotFoundError, ValueError):
|
|
@@ -120,12 +127,10 @@ def resolve_usage_windows(
|
|
| 120 |
|
| 121 |
now_utc = _utc(now or datetime.now(UTC))
|
| 122 |
local_now = now_utc.astimezone(tz)
|
| 123 |
-
|
| 124 |
-
month_local = today_local.replace(day=1)
|
| 125 |
return {
|
| 126 |
"timezone": tz.key,
|
| 127 |
"now_utc": now_utc,
|
| 128 |
-
"today_start_utc": today_local.astimezone(UTC),
|
| 129 |
"month_start_utc": month_local.astimezone(UTC),
|
| 130 |
}
|
| 131 |
|
|
@@ -133,26 +138,23 @@ def resolve_usage_windows(
|
|
| 133 |
def _empty_bucket(
|
| 134 |
*,
|
| 135 |
session_id: str | None = None,
|
| 136 |
-
window_start: datetime | None = None,
|
| 137 |
-
window_end: datetime | None = None,
|
| 138 |
-
timezone: str | None = None,
|
| 139 |
) -> dict[str, Any]:
|
| 140 |
return {
|
| 141 |
"session_id": session_id,
|
| 142 |
-
"window_start": _iso(window_start),
|
| 143 |
-
"window_end": _iso(window_end),
|
| 144 |
-
"timezone": timezone,
|
| 145 |
"total_usd": 0.0,
|
| 146 |
"inference_usd": 0.0,
|
| 147 |
"hf_jobs_estimated_usd": 0.0,
|
|
|
|
| 148 |
"llm_calls": 0,
|
| 149 |
"hf_jobs_count": 0,
|
|
|
|
| 150 |
"prompt_tokens": 0,
|
| 151 |
"completion_tokens": 0,
|
| 152 |
"cache_read_tokens": 0,
|
| 153 |
"cache_creation_tokens": 0,
|
| 154 |
"total_tokens": 0,
|
| 155 |
"hf_jobs_billable_seconds_estimate": 0,
|
|
|
|
| 156 |
}
|
| 157 |
|
| 158 |
|
|
@@ -178,16 +180,8 @@ def aggregate_usage_events(
|
|
| 178 |
events: list[dict[str, Any]],
|
| 179 |
*,
|
| 180 |
session_id: str | None = None,
|
| 181 |
-
window_start: datetime | None = None,
|
| 182 |
-
window_end: datetime | None = None,
|
| 183 |
-
timezone: str | None = None,
|
| 184 |
) -> dict[str, Any]:
|
| 185 |
-
bucket = _empty_bucket(
|
| 186 |
-
session_id=session_id,
|
| 187 |
-
window_start=window_start,
|
| 188 |
-
window_end=window_end,
|
| 189 |
-
timezone=timezone,
|
| 190 |
-
)
|
| 191 |
for event in events:
|
| 192 |
event_type = event.get("event_type")
|
| 193 |
data = event.get("data") or {}
|
|
@@ -217,16 +211,114 @@ def aggregate_usage_events(
|
|
| 217 |
bucket["hf_jobs_billable_seconds_estimate"] += _coerce_int(
|
| 218 |
data.get("billable_seconds_estimate") or data.get("wall_time_s")
|
| 219 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
bucket["inference_usd"] = round(bucket["inference_usd"], 6)
|
| 222 |
bucket["hf_jobs_estimated_usd"] = round(bucket["hf_jobs_estimated_usd"], 6)
|
|
|
|
| 223 |
bucket["total_usd"] = round(
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
6,
|
| 226 |
)
|
| 227 |
return bucket
|
| 228 |
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
def _account_bucket_from_billing_usage(
|
| 231 |
payload: dict[str, Any] | None,
|
| 232 |
*,
|
|
@@ -474,15 +566,12 @@ async def _build_hf_account_usage(
|
|
| 474 |
session_id: str | None,
|
| 475 |
timezone: str,
|
| 476 |
now_utc: datetime,
|
| 477 |
-
today_start: datetime,
|
| 478 |
month_start: datetime,
|
| 479 |
-
include_rollups: bool = True,
|
| 480 |
) -> dict[str, Any]:
|
| 481 |
account_usage: dict[str, Any] = {
|
| 482 |
"source": "hf_billing_usage_v2",
|
| 483 |
"available": False,
|
| 484 |
"current_session": None,
|
| 485 |
-
"today": None,
|
| 486 |
"month": None,
|
| 487 |
"inference_providers_credits": None,
|
| 488 |
}
|
|
@@ -513,13 +602,6 @@ async def _build_hf_account_usage(
|
|
| 513 |
),
|
| 514 |
),
|
| 515 |
}
|
| 516 |
-
if include_rollups:
|
| 517 |
-
window_tasks["today"] = (
|
| 518 |
-
today_start,
|
| 519 |
-
asyncio.create_task(
|
| 520 |
-
_fetch_hf_billing_usage_v2(hf_token, start=today_start, end=now_utc)
|
| 521 |
-
),
|
| 522 |
-
)
|
| 523 |
if session_start is not None:
|
| 524 |
if baseline_month_start is not None:
|
| 525 |
window_tasks["current_session_baseline"] = (
|
|
@@ -676,12 +758,10 @@ async def build_usage_response(
|
|
| 676 |
session_id: str | None = None,
|
| 677 |
timezone_name: str | None = None,
|
| 678 |
now: datetime | None = None,
|
| 679 |
-
include_rollups: bool = True,
|
| 680 |
) -> dict[str, Any]:
|
| 681 |
windows = resolve_usage_windows(timezone_name, now=now)
|
| 682 |
timezone = str(windows["timezone"])
|
| 683 |
now_utc = windows["now_utc"]
|
| 684 |
-
today_start = windows["today_start_utc"]
|
| 685 |
month_start = windows["month_start_utc"]
|
| 686 |
|
| 687 |
session_events: list[dict[str, Any]] = []
|
|
@@ -692,32 +772,13 @@ async def build_usage_response(
|
|
| 692 |
session_id=session_id,
|
| 693 |
)
|
| 694 |
|
| 695 |
-
today_events: list[dict[str, Any]] = []
|
| 696 |
-
month_events: list[dict[str, Any]] = []
|
| 697 |
-
if include_rollups:
|
| 698 |
-
today_events = await _load_usage_events(
|
| 699 |
-
manager,
|
| 700 |
-
user_id=user_id,
|
| 701 |
-
start=today_start,
|
| 702 |
-
end=now_utc,
|
| 703 |
-
timezone_name=timezone,
|
| 704 |
-
)
|
| 705 |
-
month_events = await _load_usage_events(
|
| 706 |
-
manager,
|
| 707 |
-
user_id=user_id,
|
| 708 |
-
start=month_start,
|
| 709 |
-
end=now_utc,
|
| 710 |
-
timezone_name=timezone,
|
| 711 |
-
)
|
| 712 |
hf_account = await _build_hf_account_usage(
|
| 713 |
manager,
|
| 714 |
hf_token=hf_token,
|
| 715 |
session_id=session_id,
|
| 716 |
timezone=timezone,
|
| 717 |
now_utc=now_utc,
|
| 718 |
-
today_start=today_start,
|
| 719 |
month_start=month_start,
|
| 720 |
-
include_rollups=include_rollups,
|
| 721 |
)
|
| 722 |
|
| 723 |
return {
|
|
@@ -730,18 +791,6 @@ async def build_usage_response(
|
|
| 730 |
if session_id
|
| 731 |
else None
|
| 732 |
),
|
| 733 |
-
"today": aggregate_usage_events(
|
| 734 |
-
today_events,
|
| 735 |
-
window_start=today_start,
|
| 736 |
-
window_end=now_utc,
|
| 737 |
-
timezone=timezone,
|
| 738 |
-
),
|
| 739 |
-
"month": aggregate_usage_events(
|
| 740 |
-
month_events,
|
| 741 |
-
window_start=month_start,
|
| 742 |
-
window_end=now_utc,
|
| 743 |
-
timezone=timezone,
|
| 744 |
-
),
|
| 745 |
"hf_account": hf_account,
|
| 746 |
"links": {
|
| 747 |
"hf_billing": HF_BILLING_URL,
|
|
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import logging
|
| 5 |
+
from datetime import UTC, datetime, timedelta
|
| 6 |
from typing import Any
|
| 7 |
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
| 8 |
|
| 9 |
import httpx
|
| 10 |
|
| 11 |
+
from agent.core.cost_estimation import SPACE_PRICE_USD_PER_HOUR
|
| 12 |
+
|
| 13 |
+
USAGE_EVENT_TYPES = (
|
| 14 |
+
"llm_call",
|
| 15 |
+
"hf_job_complete",
|
| 16 |
+
"sandbox_create",
|
| 17 |
+
"sandbox_destroy",
|
| 18 |
+
)
|
| 19 |
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
|
|
|
| 119 |
*,
|
| 120 |
now: datetime | None = None,
|
| 121 |
) -> dict[str, datetime | str]:
|
| 122 |
+
"""Return UTC month window for a browser timezone."""
|
| 123 |
try:
|
| 124 |
tz = ZoneInfo(timezone_name or "UTC")
|
| 125 |
except (ZoneInfoNotFoundError, ValueError):
|
|
|
|
| 127 |
|
| 128 |
now_utc = _utc(now or datetime.now(UTC))
|
| 129 |
local_now = now_utc.astimezone(tz)
|
| 130 |
+
month_local = local_now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
| 131 |
return {
|
| 132 |
"timezone": tz.key,
|
| 133 |
"now_utc": now_utc,
|
|
|
|
| 134 |
"month_start_utc": month_local.astimezone(UTC),
|
| 135 |
}
|
| 136 |
|
|
|
|
| 138 |
def _empty_bucket(
|
| 139 |
*,
|
| 140 |
session_id: str | None = None,
|
|
|
|
|
|
|
|
|
|
| 141 |
) -> dict[str, Any]:
|
| 142 |
return {
|
| 143 |
"session_id": session_id,
|
|
|
|
|
|
|
|
|
|
| 144 |
"total_usd": 0.0,
|
| 145 |
"inference_usd": 0.0,
|
| 146 |
"hf_jobs_estimated_usd": 0.0,
|
| 147 |
+
"sandbox_estimated_usd": 0.0,
|
| 148 |
"llm_calls": 0,
|
| 149 |
"hf_jobs_count": 0,
|
| 150 |
+
"sandbox_count": 0,
|
| 151 |
"prompt_tokens": 0,
|
| 152 |
"completion_tokens": 0,
|
| 153 |
"cache_read_tokens": 0,
|
| 154 |
"cache_creation_tokens": 0,
|
| 155 |
"total_tokens": 0,
|
| 156 |
"hf_jobs_billable_seconds_estimate": 0,
|
| 157 |
+
"sandbox_billable_seconds_estimate": 0,
|
| 158 |
}
|
| 159 |
|
| 160 |
|
|
|
|
| 180 |
events: list[dict[str, Any]],
|
| 181 |
*,
|
| 182 |
session_id: str | None = None,
|
|
|
|
|
|
|
|
|
|
| 183 |
) -> dict[str, Any]:
|
| 184 |
+
bucket = _empty_bucket(session_id=session_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
for event in events:
|
| 186 |
event_type = event.get("event_type")
|
| 187 |
data = event.get("data") or {}
|
|
|
|
| 211 |
bucket["hf_jobs_billable_seconds_estimate"] += _coerce_int(
|
| 212 |
data.get("billable_seconds_estimate") or data.get("wall_time_s")
|
| 213 |
)
|
| 214 |
+
elif event_type == "sandbox_destroy":
|
| 215 |
+
# Sandbox costs are paired and added after the main pass so the
|
| 216 |
+
# create event can provide hardware pricing metadata.
|
| 217 |
+
continue
|
| 218 |
+
|
| 219 |
+
_aggregate_sandbox_usage(events, bucket)
|
| 220 |
|
| 221 |
bucket["inference_usd"] = round(bucket["inference_usd"], 6)
|
| 222 |
bucket["hf_jobs_estimated_usd"] = round(bucket["hf_jobs_estimated_usd"], 6)
|
| 223 |
+
bucket["sandbox_estimated_usd"] = round(bucket["sandbox_estimated_usd"], 6)
|
| 224 |
bucket["total_usd"] = round(
|
| 225 |
+
(
|
| 226 |
+
bucket["inference_usd"]
|
| 227 |
+
+ bucket["hf_jobs_estimated_usd"]
|
| 228 |
+
+ bucket["sandbox_estimated_usd"]
|
| 229 |
+
),
|
| 230 |
6,
|
| 231 |
)
|
| 232 |
return bucket
|
| 233 |
|
| 234 |
|
| 235 |
+
def _event_sort_key(
|
| 236 |
+
indexed_event: tuple[int, dict[str, Any]],
|
| 237 |
+
) -> tuple[bool, datetime, int]:
|
| 238 |
+
index, event = indexed_event
|
| 239 |
+
created_at = event_created_at(event)
|
| 240 |
+
return (
|
| 241 |
+
created_at is None,
|
| 242 |
+
created_at or datetime.min.replace(tzinfo=UTC),
|
| 243 |
+
index,
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def _sandbox_id(event: dict[str, Any]) -> str | None:
|
| 248 |
+
data = event.get("data") or {}
|
| 249 |
+
sandbox_id = data.get("sandbox_id")
|
| 250 |
+
return sandbox_id if isinstance(sandbox_id, str) and sandbox_id else None
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def _sandbox_duration_seconds(
|
| 254 |
+
create_event: dict[str, Any],
|
| 255 |
+
destroy_event: dict[str, Any],
|
| 256 |
+
) -> int:
|
| 257 |
+
create_data = create_event.get("data") or {}
|
| 258 |
+
destroy_data = destroy_event.get("data") or {}
|
| 259 |
+
lifetime_s = _coerce_int(destroy_data.get("lifetime_s"))
|
| 260 |
+
|
| 261 |
+
if lifetime_s > 0:
|
| 262 |
+
# Telemetry starts the lifetime clock before create latency elapses.
|
| 263 |
+
return lifetime_s
|
| 264 |
+
|
| 265 |
+
create_at = event_created_at(create_event)
|
| 266 |
+
destroy_at = event_created_at(destroy_event)
|
| 267 |
+
if create_at is None or destroy_at is None:
|
| 268 |
+
return 0
|
| 269 |
+
create_latency_s = max(0, _coerce_int(create_data.get("create_latency_s")))
|
| 270 |
+
interval_start = create_at - timedelta(seconds=create_latency_s)
|
| 271 |
+
if destroy_at <= interval_start:
|
| 272 |
+
return 0
|
| 273 |
+
return int((destroy_at - interval_start).total_seconds())
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def _aggregate_sandbox_usage(
|
| 277 |
+
events: list[dict[str, Any]],
|
| 278 |
+
bucket: dict[str, Any],
|
| 279 |
+
) -> None:
|
| 280 |
+
lifecycle_events = [
|
| 281 |
+
(index, event)
|
| 282 |
+
for index, event in enumerate(events)
|
| 283 |
+
if event.get("event_type") in {"sandbox_create", "sandbox_destroy"}
|
| 284 |
+
]
|
| 285 |
+
ordered_events = [
|
| 286 |
+
event
|
| 287 |
+
for _, event in sorted(
|
| 288 |
+
lifecycle_events,
|
| 289 |
+
key=_event_sort_key,
|
| 290 |
+
)
|
| 291 |
+
]
|
| 292 |
+
active_creates: dict[str, dict[str, Any]] = {}
|
| 293 |
+
|
| 294 |
+
for event in ordered_events:
|
| 295 |
+
event_type = event.get("event_type")
|
| 296 |
+
sandbox_id = _sandbox_id(event)
|
| 297 |
+
if sandbox_id is None:
|
| 298 |
+
continue
|
| 299 |
+
|
| 300 |
+
if event_type == "sandbox_create":
|
| 301 |
+
active_creates[sandbox_id] = event
|
| 302 |
+
continue
|
| 303 |
+
|
| 304 |
+
if event_type != "sandbox_destroy":
|
| 305 |
+
continue
|
| 306 |
+
|
| 307 |
+
create_event = active_creates.pop(sandbox_id, None)
|
| 308 |
+
if create_event is None:
|
| 309 |
+
continue
|
| 310 |
+
|
| 311 |
+
create_data = create_event.get("data") or {}
|
| 312 |
+
hardware = str(create_data.get("hardware") or "cpu-basic")
|
| 313 |
+
price_usd_per_hour = SPACE_PRICE_USD_PER_HOUR.get(hardware, 0.0)
|
| 314 |
+
seconds = _sandbox_duration_seconds(create_event, event)
|
| 315 |
+
|
| 316 |
+
bucket["sandbox_count"] += 1
|
| 317 |
+
if price_usd_per_hour > 0:
|
| 318 |
+
bucket["sandbox_billable_seconds_estimate"] += seconds
|
| 319 |
+
bucket["sandbox_estimated_usd"] += price_usd_per_hour * (seconds / 3600)
|
| 320 |
+
|
| 321 |
+
|
| 322 |
def _account_bucket_from_billing_usage(
|
| 323 |
payload: dict[str, Any] | None,
|
| 324 |
*,
|
|
|
|
| 566 |
session_id: str | None,
|
| 567 |
timezone: str,
|
| 568 |
now_utc: datetime,
|
|
|
|
| 569 |
month_start: datetime,
|
|
|
|
| 570 |
) -> dict[str, Any]:
|
| 571 |
account_usage: dict[str, Any] = {
|
| 572 |
"source": "hf_billing_usage_v2",
|
| 573 |
"available": False,
|
| 574 |
"current_session": None,
|
|
|
|
| 575 |
"month": None,
|
| 576 |
"inference_providers_credits": None,
|
| 577 |
}
|
|
|
|
| 602 |
),
|
| 603 |
),
|
| 604 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
if session_start is not None:
|
| 606 |
if baseline_month_start is not None:
|
| 607 |
window_tasks["current_session_baseline"] = (
|
|
|
|
| 758 |
session_id: str | None = None,
|
| 759 |
timezone_name: str | None = None,
|
| 760 |
now: datetime | None = None,
|
|
|
|
| 761 |
) -> dict[str, Any]:
|
| 762 |
windows = resolve_usage_windows(timezone_name, now=now)
|
| 763 |
timezone = str(windows["timezone"])
|
| 764 |
now_utc = windows["now_utc"]
|
|
|
|
| 765 |
month_start = windows["month_start_utc"]
|
| 766 |
|
| 767 |
session_events: list[dict[str, Any]] = []
|
|
|
|
| 772 |
session_id=session_id,
|
| 773 |
)
|
| 774 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 775 |
hf_account = await _build_hf_account_usage(
|
| 776 |
manager,
|
| 777 |
hf_token=hf_token,
|
| 778 |
session_id=session_id,
|
| 779 |
timezone=timezone,
|
| 780 |
now_utc=now_utc,
|
|
|
|
| 781 |
month_start=month_start,
|
|
|
|
| 782 |
)
|
| 783 |
|
| 784 |
return {
|
|
|
|
| 791 |
if session_id
|
| 792 |
else None
|
| 793 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 794 |
"hf_account": hf_account,
|
| 795 |
"links": {
|
| 796 |
"hf_billing": HF_BILLING_URL,
|
frontend/src/components/UsageMeter.tsx
CHANGED
|
@@ -125,6 +125,10 @@ function AccountUsageSection({
|
|
| 125 |
useJobEstimate ? telemetry?.hf_jobs_estimated_usd : account?.hf_jobs_usd,
|
| 126 |
)}
|
| 127 |
/>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
<UsageRow label="LLM calls" value={formatCount(telemetry?.llm_calls)} />
|
| 129 |
<UsageRow
|
| 130 |
label="Input tokens"
|
|
@@ -179,7 +183,11 @@ export default function UsageMeter() {
|
|
| 179 |
void fetchUsage(activeSessionId);
|
| 180 |
}, [activeSessionId, fetchUsage]);
|
| 181 |
|
| 182 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
const links = useMemo(() => usage?.links ?? {}, [usage?.links]);
|
| 184 |
const billingMessage = billingUnavailableMessage(usage?.hf_account?.error);
|
| 185 |
const open = Boolean(anchorEl);
|
|
|
|
| 125 |
useJobEstimate ? telemetry?.hf_jobs_estimated_usd : account?.hf_jobs_usd,
|
| 126 |
)}
|
| 127 |
/>
|
| 128 |
+
<UsageRow
|
| 129 |
+
label="HF Sandboxes"
|
| 130 |
+
value={formatUsd(telemetry?.sandbox_estimated_usd)}
|
| 131 |
+
/>
|
| 132 |
<UsageRow label="LLM calls" value={formatCount(telemetry?.llm_calls)} />
|
| 133 |
<UsageRow
|
| 134 |
label="Input tokens"
|
|
|
|
| 183 |
void fetchUsage(activeSessionId);
|
| 184 |
}, [activeSessionId, fetchUsage]);
|
| 185 |
|
| 186 |
+
const accountSessionTotal = usage?.hf_account?.current_session?.total_usd;
|
| 187 |
+
const sessionTotal =
|
| 188 |
+
accountSessionTotal == null
|
| 189 |
+
? usage?.session?.total_usd
|
| 190 |
+
: accountSessionTotal + (usage?.session?.sandbox_estimated_usd ?? 0);
|
| 191 |
const links = useMemo(() => usage?.links ?? {}, [usage?.links]);
|
| 192 |
const billingMessage = billingUnavailableMessage(usage?.hf_account?.error);
|
| 193 |
const open = Boolean(anchorEl);
|
frontend/src/hooks/useAgentChat.ts
CHANGED
|
@@ -741,7 +741,7 @@ export function useAgentChat({ sessionId, isActive, isProcessing = false, onRead
|
|
| 741 |
const state = event.data?.state as string;
|
| 742 |
const toolName = event.data?.tool as string;
|
| 743 |
if (state === 'running' && toolName) sideChannel.onToolRunning(toolName);
|
| 744 |
-
} else if (et === 'llm_call' || et === 'hf_job_complete') {
|
| 745 |
sideChannel.onUsageEvent(et, (event.data || {}) as Record<string, unknown>);
|
| 746 |
} else if (et === 'turn_complete' || et === 'error' || et === 'interrupted') {
|
| 747 |
sideChannel.onProcessingDone();
|
|
|
|
| 741 |
const state = event.data?.state as string;
|
| 742 |
const toolName = event.data?.tool as string;
|
| 743 |
if (state === 'running' && toolName) sideChannel.onToolRunning(toolName);
|
| 744 |
+
} else if (et === 'llm_call' || et === 'hf_job_complete' || et === 'sandbox_destroy') {
|
| 745 |
sideChannel.onUsageEvent(et, (event.data || {}) as Record<string, unknown>);
|
| 746 |
} else if (et === 'turn_complete' || et === 'error' || et === 'interrupted') {
|
| 747 |
sideChannel.onProcessingDone();
|
frontend/src/lib/sse-chat-transport.ts
CHANGED
|
@@ -39,7 +39,7 @@ export interface SideChannelCallbacks {
|
|
| 39 |
onToolOutputPanel: (tool: string, toolCallId: string, output: string, success: boolean) => void;
|
| 40 |
onStreaming: () => void;
|
| 41 |
onToolRunning: (toolName: string, description?: string) => void;
|
| 42 |
-
onUsageEvent: (eventType: 'llm_call' | 'hf_job_complete', data: Record<string, unknown>) => void;
|
| 43 |
onInterrupted: () => void;
|
| 44 |
onRecoverMessages: (context: MessageRecoveryContext) => Promise<boolean>;
|
| 45 |
}
|
|
@@ -375,6 +375,7 @@ function createEventToChunkStream(sideChannel: SideChannelCallbacks): TransformS
|
|
| 375 |
|
| 376 |
case 'llm_call':
|
| 377 |
case 'hf_job_complete':
|
|
|
|
| 378 |
sideChannel.onUsageEvent(event.event_type, event.data || {});
|
| 379 |
break;
|
| 380 |
|
|
|
|
| 39 |
onToolOutputPanel: (tool: string, toolCallId: string, output: string, success: boolean) => void;
|
| 40 |
onStreaming: () => void;
|
| 41 |
onToolRunning: (toolName: string, description?: string) => void;
|
| 42 |
+
onUsageEvent: (eventType: 'llm_call' | 'hf_job_complete' | 'sandbox_destroy', data: Record<string, unknown>) => void;
|
| 43 |
onInterrupted: () => void;
|
| 44 |
onRecoverMessages: (context: MessageRecoveryContext) => Promise<boolean>;
|
| 45 |
}
|
|
|
|
| 375 |
|
| 376 |
case 'llm_call':
|
| 377 |
case 'hf_job_complete':
|
| 378 |
+
case 'sandbox_destroy':
|
| 379 |
sideChannel.onUsageEvent(event.event_type, event.data || {});
|
| 380 |
break;
|
| 381 |
|
frontend/src/store/usageStore.ts
CHANGED
|
@@ -3,20 +3,20 @@ import { apiFetch } from '@/utils/api';
|
|
| 3 |
|
| 4 |
export interface UsageBucket {
|
| 5 |
session_id?: string | null;
|
| 6 |
-
window_start?: string | null;
|
| 7 |
-
window_end?: string | null;
|
| 8 |
-
timezone?: string | null;
|
| 9 |
total_usd: number;
|
| 10 |
inference_usd: number;
|
| 11 |
hf_jobs_estimated_usd: number;
|
|
|
|
| 12 |
llm_calls: number;
|
| 13 |
hf_jobs_count: number;
|
|
|
|
| 14 |
prompt_tokens: number;
|
| 15 |
completion_tokens: number;
|
| 16 |
cache_read_tokens: number;
|
| 17 |
cache_creation_tokens: number;
|
| 18 |
total_tokens: number;
|
| 19 |
hf_jobs_billable_seconds_estimate: number;
|
|
|
|
| 20 |
}
|
| 21 |
|
| 22 |
export interface HfAccountUsageBucket {
|
|
@@ -46,7 +46,6 @@ export interface HfAccountUsage {
|
|
| 46 |
available: boolean;
|
| 47 |
error?: string | null;
|
| 48 |
current_session: HfAccountUsageBucket | null;
|
| 49 |
-
today: HfAccountUsageBucket | null;
|
| 50 |
month: HfAccountUsageBucket | null;
|
| 51 |
inference_providers_credits: HfInferenceProvidersCredits | null;
|
| 52 |
}
|
|
@@ -57,13 +56,11 @@ export interface UsageResponse {
|
|
| 57 |
generated_at: string;
|
| 58 |
timezone: string;
|
| 59 |
session: UsageBucket | null;
|
| 60 |
-
today: UsageBucket;
|
| 61 |
-
month: UsageBucket;
|
| 62 |
hf_account?: HfAccountUsage | null;
|
| 63 |
links: Record<string, string>;
|
| 64 |
}
|
| 65 |
|
| 66 |
-
type UsageEventType = 'llm_call' | 'hf_job_complete';
|
| 67 |
|
| 68 |
interface UsageStore {
|
| 69 |
usage: UsageResponse | null;
|
|
@@ -93,7 +90,6 @@ function usageUrl(sessionId?: string | null): string {
|
|
| 93 |
const params = new URLSearchParams();
|
| 94 |
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone || 'UTC';
|
| 95 |
params.set('tz', timezone);
|
| 96 |
-
params.set('include_rollups', 'false');
|
| 97 |
if (sessionId) params.set('session_id', sessionId);
|
| 98 |
return `/api/usage?${params.toString()}`;
|
| 99 |
}
|
|
@@ -129,7 +125,11 @@ function applyEventToBucket(
|
|
| 129 |
intValue(data.billable_seconds_estimate) || intValue(data.wall_time_s);
|
| 130 |
}
|
| 131 |
|
| 132 |
-
next.total_usd = roundUsd(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
return next;
|
| 134 |
}
|
| 135 |
|
|
@@ -165,6 +165,11 @@ export const useUsageStore = create<UsageStore>()((set, get) => ({
|
|
| 165 |
},
|
| 166 |
|
| 167 |
applyUsageEvent: (sessionId, eventType, data) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
const current = get().usage;
|
| 169 |
if (!current) return;
|
| 170 |
set({
|
|
@@ -174,8 +179,6 @@ export const useUsageStore = create<UsageStore>()((set, get) => ({
|
|
| 174 |
current.session?.session_id === sessionId
|
| 175 |
? applyEventToBucket(current.session, eventType, data)
|
| 176 |
: current.session,
|
| 177 |
-
today: applyEventToBucket(current.today, eventType, data) ?? current.today,
|
| 178 |
-
month: applyEventToBucket(current.month, eventType, data) ?? current.month,
|
| 179 |
},
|
| 180 |
});
|
| 181 |
},
|
|
|
|
| 3 |
|
| 4 |
export interface UsageBucket {
|
| 5 |
session_id?: string | null;
|
|
|
|
|
|
|
|
|
|
| 6 |
total_usd: number;
|
| 7 |
inference_usd: number;
|
| 8 |
hf_jobs_estimated_usd: number;
|
| 9 |
+
sandbox_estimated_usd: number;
|
| 10 |
llm_calls: number;
|
| 11 |
hf_jobs_count: number;
|
| 12 |
+
sandbox_count: number;
|
| 13 |
prompt_tokens: number;
|
| 14 |
completion_tokens: number;
|
| 15 |
cache_read_tokens: number;
|
| 16 |
cache_creation_tokens: number;
|
| 17 |
total_tokens: number;
|
| 18 |
hf_jobs_billable_seconds_estimate: number;
|
| 19 |
+
sandbox_billable_seconds_estimate: number;
|
| 20 |
}
|
| 21 |
|
| 22 |
export interface HfAccountUsageBucket {
|
|
|
|
| 46 |
available: boolean;
|
| 47 |
error?: string | null;
|
| 48 |
current_session: HfAccountUsageBucket | null;
|
|
|
|
| 49 |
month: HfAccountUsageBucket | null;
|
| 50 |
inference_providers_credits: HfInferenceProvidersCredits | null;
|
| 51 |
}
|
|
|
|
| 56 |
generated_at: string;
|
| 57 |
timezone: string;
|
| 58 |
session: UsageBucket | null;
|
|
|
|
|
|
|
| 59 |
hf_account?: HfAccountUsage | null;
|
| 60 |
links: Record<string, string>;
|
| 61 |
}
|
| 62 |
|
| 63 |
+
type UsageEventType = 'llm_call' | 'hf_job_complete' | 'sandbox_destroy';
|
| 64 |
|
| 65 |
interface UsageStore {
|
| 66 |
usage: UsageResponse | null;
|
|
|
|
| 90 |
const params = new URLSearchParams();
|
| 91 |
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone || 'UTC';
|
| 92 |
params.set('tz', timezone);
|
|
|
|
| 93 |
if (sessionId) params.set('session_id', sessionId);
|
| 94 |
return `/api/usage?${params.toString()}`;
|
| 95 |
}
|
|
|
|
| 125 |
intValue(data.billable_seconds_estimate) || intValue(data.wall_time_s);
|
| 126 |
}
|
| 127 |
|
| 128 |
+
next.total_usd = roundUsd(
|
| 129 |
+
next.inference_usd +
|
| 130 |
+
next.hf_jobs_estimated_usd +
|
| 131 |
+
(next.sandbox_estimated_usd ?? 0),
|
| 132 |
+
);
|
| 133 |
return next;
|
| 134 |
}
|
| 135 |
|
|
|
|
| 165 |
},
|
| 166 |
|
| 167 |
applyUsageEvent: (sessionId, eventType, data) => {
|
| 168 |
+
if (eventType === 'sandbox_destroy') {
|
| 169 |
+
void get().fetchUsage(sessionId);
|
| 170 |
+
return;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
const current = get().usage;
|
| 174 |
if (!current) return;
|
| 175 |
set({
|
|
|
|
| 179 |
current.session?.session_id === sessionId
|
| 180 |
? applyEventToBucket(current.session, eventType, data)
|
| 181 |
: current.session,
|
|
|
|
|
|
|
| 182 |
},
|
| 183 |
});
|
| 184 |
},
|
frontend/src/types/events.ts
CHANGED
|
@@ -15,6 +15,8 @@ export type EventType =
|
|
| 15 |
| 'tool_state_change'
|
| 16 |
| 'llm_call'
|
| 17 |
| 'hf_job_complete'
|
|
|
|
|
|
|
| 18 |
| 'turn_complete'
|
| 19 |
| 'compacted'
|
| 20 |
| 'error'
|
|
|
|
| 15 |
| 'tool_state_change'
|
| 16 |
| 'llm_call'
|
| 17 |
| 'hf_job_complete'
|
| 18 |
+
| 'sandbox_create'
|
| 19 |
+
| 'sandbox_destroy'
|
| 20 |
| 'turn_complete'
|
| 21 |
| 'compacted'
|
| 22 |
| 'error'
|
tests/unit/test_session_persistence.py
CHANGED
|
@@ -212,7 +212,7 @@ async def test_load_usage_events_scopes_mongo_queries_to_current_user_and_window
|
|
| 212 |
None,
|
| 213 |
)
|
| 214 |
]
|
| 215 |
-
assert store.db.session_events.cursors[0].sort_calls == []
|
| 216 |
|
| 217 |
|
| 218 |
@pytest.mark.asyncio
|
|
|
|
| 212 |
None,
|
| 213 |
)
|
| 214 |
]
|
| 215 |
+
assert store.db.session_events.cursors[0].sort_calls == [(("created_at", 1), {})]
|
| 216 |
|
| 217 |
|
| 218 |
@pytest.mark.asyncio
|
tests/unit/test_usage.py
CHANGED
|
@@ -10,11 +10,13 @@ if str(_BACKEND_DIR) not in sys.path:
|
|
| 10 |
sys.path.insert(0, str(_BACKEND_DIR))
|
| 11 |
|
| 12 |
from usage import ( # noqa: E402
|
|
|
|
| 13 |
_account_bucket_from_billing_usage,
|
| 14 |
aggregate_usage_events,
|
| 15 |
build_usage_response,
|
| 16 |
resolve_usage_windows,
|
| 17 |
)
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def _event(event_type, data=None, created_at="2026-06-01T12:00:00+00:00"):
|
|
@@ -25,7 +27,7 @@ def _event(event_type, data=None, created_at="2026-06-01T12:00:00+00:00"):
|
|
| 25 |
}
|
| 26 |
|
| 27 |
|
| 28 |
-
def
|
| 29 |
events = [
|
| 30 |
_event(
|
| 31 |
"llm_call",
|
|
@@ -46,6 +48,22 @@ def test_aggregate_usage_events_sums_inference_and_jobs():
|
|
| 46 |
"billable_seconds_estimate": 1800,
|
| 47 |
},
|
| 48 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
]
|
| 50 |
|
| 51 |
usage = aggregate_usage_events(events, session_id="s1")
|
|
@@ -53,15 +71,18 @@ def test_aggregate_usage_events_sums_inference_and_jobs():
|
|
| 53 |
assert usage["session_id"] == "s1"
|
| 54 |
assert usage["llm_calls"] == 2
|
| 55 |
assert usage["hf_jobs_count"] == 1
|
|
|
|
| 56 |
assert usage["prompt_tokens"] == 110
|
| 57 |
assert usage["completion_tokens"] == 50
|
| 58 |
assert usage["cache_read_tokens"] == 25
|
| 59 |
assert usage["cache_creation_tokens"] == 5
|
| 60 |
assert usage["total_tokens"] == 190
|
| 61 |
assert usage["hf_jobs_billable_seconds_estimate"] == 1800
|
|
|
|
| 62 |
assert usage["inference_usd"] == 0.375
|
| 63 |
assert usage["hf_jobs_estimated_usd"] == 1.5
|
| 64 |
-
assert usage["
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
def test_aggregate_usage_events_treats_missing_costs_as_zero():
|
|
@@ -79,6 +100,84 @@ def test_aggregate_usage_events_treats_missing_costs_as_zero():
|
|
| 79 |
assert usage["total_usd"] == 0.0
|
| 80 |
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
def test_account_bucket_from_hf_billing_usage_v2():
|
| 83 |
usage = _account_bucket_from_billing_usage(
|
| 84 |
{
|
|
@@ -112,7 +211,6 @@ def test_usage_windows_respect_browser_timezone():
|
|
| 112 |
)
|
| 113 |
|
| 114 |
assert windows["timezone"] == "America/Los_Angeles"
|
| 115 |
-
assert windows["today_start_utc"] == datetime(2026, 6, 1, 7, 0, tzinfo=UTC)
|
| 116 |
assert windows["month_start_utc"] == datetime(2026, 6, 1, 7, 0, tzinfo=UTC)
|
| 117 |
|
| 118 |
|
|
@@ -162,7 +260,7 @@ def _agent_session(session_id, user_id, events):
|
|
| 162 |
|
| 163 |
|
| 164 |
@pytest.mark.asyncio
|
| 165 |
-
async def
|
| 166 |
manager = _Manager(
|
| 167 |
{
|
| 168 |
"owner-session": _agent_session(
|
|
@@ -186,9 +284,7 @@ async def test_runtime_usage_excludes_other_users():
|
|
| 186 |
now=datetime(2026, 6, 1, 13, 0, tzinfo=UTC),
|
| 187 |
)
|
| 188 |
|
| 189 |
-
assert usage["
|
| 190 |
-
assert usage["today"]["inference_usd"] == 0.5
|
| 191 |
-
assert usage["month"]["inference_usd"] == 0.5
|
| 192 |
|
| 193 |
|
| 194 |
@pytest.mark.asyncio
|
|
@@ -219,11 +315,10 @@ async def test_runtime_usage_includes_requested_session_total():
|
|
| 219 |
|
| 220 |
assert usage["session"]["session_id"] == "s1"
|
| 221 |
assert usage["session"]["inference_usd"] == 0.25
|
| 222 |
-
assert usage["today"]["inference_usd"] == 0.0
|
| 223 |
|
| 224 |
|
| 225 |
@pytest.mark.asyncio
|
| 226 |
-
async def
|
| 227 |
manager = _Manager(
|
| 228 |
{
|
| 229 |
"s1": _agent_session(
|
|
@@ -249,9 +344,7 @@ async def test_runtime_usage_interprets_naive_timestamps_in_browser_timezone():
|
|
| 249 |
)
|
| 250 |
|
| 251 |
assert usage["session"]["llm_calls"] == 1
|
| 252 |
-
assert usage["
|
| 253 |
-
assert usage["month"]["llm_calls"] == 1
|
| 254 |
-
assert usage["today"]["total_tokens"] == 42
|
| 255 |
|
| 256 |
|
| 257 |
@pytest.mark.asyncio
|
|
@@ -311,8 +404,6 @@ async def test_hf_account_usage_uses_usage_window_for_current_delta(monkeypatch)
|
|
| 311 |
calls.append((start, end))
|
| 312 |
if start == usage_window_started_at:
|
| 313 |
used_nano = 500_000_000
|
| 314 |
-
elif start == datetime(2026, 6, 5, 0, 0, tzinfo=UTC):
|
| 315 |
-
used_nano = 1_000_000_000
|
| 316 |
else:
|
| 317 |
used_nano = 2_000_000_000
|
| 318 |
return {
|
|
@@ -340,7 +431,6 @@ async def test_hf_account_usage_uses_usage_window_for_current_delta(monkeypatch)
|
|
| 340 |
|
| 341 |
assert usage["hf_account"]["available"] is True
|
| 342 |
assert usage["hf_account"]["current_session"]["inference_providers_usd"] == 0.5
|
| 343 |
-
assert usage["hf_account"]["today"]["inference_providers_usd"] == 1.0
|
| 344 |
assert usage["hf_account"]["month"]["inference_providers_usd"] == 2.0
|
| 345 |
assert usage["hf_account"]["inference_providers_credits"] == {
|
| 346 |
"included_usd": 2.0,
|
|
@@ -353,7 +443,6 @@ async def test_hf_account_usage_uses_usage_window_for_current_delta(monkeypatch)
|
|
| 353 |
"period_end": None,
|
| 354 |
}
|
| 355 |
assert {start for start, _ in calls} == {
|
| 356 |
-
datetime(2026, 6, 5, 0, 0, tzinfo=UTC),
|
| 357 |
datetime(2026, 6, 1, 0, 0, tzinfo=UTC),
|
| 358 |
usage_window_started_at,
|
| 359 |
}
|
|
@@ -452,7 +541,6 @@ async def test_hf_account_usage_falls_back_to_persisted_created_at(monkeypatch):
|
|
| 452 |
session_id="s1",
|
| 453 |
timezone_name="UTC",
|
| 454 |
now=datetime(2026, 6, 5, 13, 0, tzinfo=UTC),
|
| 455 |
-
include_rollups=False,
|
| 456 |
)
|
| 457 |
|
| 458 |
assert usage["hf_account"]["current_session"]["window_start"] == (
|
|
@@ -465,7 +553,7 @@ async def test_hf_account_usage_falls_back_to_persisted_created_at(monkeypatch):
|
|
| 465 |
|
| 466 |
|
| 467 |
@pytest.mark.asyncio
|
| 468 |
-
async def
|
| 469 |
session_created_at = datetime(2026, 6, 5, 12, 0, tzinfo=UTC)
|
| 470 |
store = _RecordingStore()
|
| 471 |
manager = _Manager(
|
|
@@ -503,7 +591,6 @@ async def test_compact_usage_skips_unused_rollup_loads(monkeypatch):
|
|
| 503 |
session_id="s1",
|
| 504 |
timezone_name="UTC",
|
| 505 |
now=datetime(2026, 6, 5, 13, 0, tzinfo=UTC),
|
| 506 |
-
include_rollups=False,
|
| 507 |
)
|
| 508 |
|
| 509 |
assert store.calls == [("owner", {"session_id": "s1", "start": None, "end": None})]
|
|
@@ -512,7 +599,4 @@ async def test_compact_usage_skips_unused_rollup_loads(monkeypatch):
|
|
| 512 |
session_created_at,
|
| 513 |
}
|
| 514 |
assert datetime(2026, 6, 5, 0, 0, tzinfo=UTC) not in billing_starts
|
| 515 |
-
assert usage["today"]["llm_calls"] == 0
|
| 516 |
-
assert usage["month"]["llm_calls"] == 0
|
| 517 |
-
assert usage["hf_account"]["today"] is None
|
| 518 |
assert usage["hf_account"]["month"]["inference_providers_usd"] == 0.0
|
|
|
|
| 10 |
sys.path.insert(0, str(_BACKEND_DIR))
|
| 11 |
|
| 12 |
from usage import ( # noqa: E402
|
| 13 |
+
USAGE_EVENT_TYPES,
|
| 14 |
_account_bucket_from_billing_usage,
|
| 15 |
aggregate_usage_events,
|
| 16 |
build_usage_response,
|
| 17 |
resolve_usage_windows,
|
| 18 |
)
|
| 19 |
+
from agent.core import session_persistence # noqa: E402
|
| 20 |
|
| 21 |
|
| 22 |
def _event(event_type, data=None, created_at="2026-06-01T12:00:00+00:00"):
|
|
|
|
| 27 |
}
|
| 28 |
|
| 29 |
|
| 30 |
+
def test_aggregate_usage_events_sums_inference_jobs_and_sandboxes():
|
| 31 |
events = [
|
| 32 |
_event(
|
| 33 |
"llm_call",
|
|
|
|
| 48 |
"billable_seconds_estimate": 1800,
|
| 49 |
},
|
| 50 |
),
|
| 51 |
+
_event(
|
| 52 |
+
"sandbox_create",
|
| 53 |
+
{
|
| 54 |
+
"sandbox_id": "alice/sandbox-12345678",
|
| 55 |
+
"hardware": "cpu-upgrade",
|
| 56 |
+
},
|
| 57 |
+
created_at="2026-06-01T12:30:00+00:00",
|
| 58 |
+
),
|
| 59 |
+
_event(
|
| 60 |
+
"sandbox_destroy",
|
| 61 |
+
{
|
| 62 |
+
"sandbox_id": "alice/sandbox-12345678",
|
| 63 |
+
"lifetime_s": 3600,
|
| 64 |
+
},
|
| 65 |
+
created_at="2026-06-01T13:30:00+00:00",
|
| 66 |
+
),
|
| 67 |
]
|
| 68 |
|
| 69 |
usage = aggregate_usage_events(events, session_id="s1")
|
|
|
|
| 71 |
assert usage["session_id"] == "s1"
|
| 72 |
assert usage["llm_calls"] == 2
|
| 73 |
assert usage["hf_jobs_count"] == 1
|
| 74 |
+
assert usage["sandbox_count"] == 1
|
| 75 |
assert usage["prompt_tokens"] == 110
|
| 76 |
assert usage["completion_tokens"] == 50
|
| 77 |
assert usage["cache_read_tokens"] == 25
|
| 78 |
assert usage["cache_creation_tokens"] == 5
|
| 79 |
assert usage["total_tokens"] == 190
|
| 80 |
assert usage["hf_jobs_billable_seconds_estimate"] == 1800
|
| 81 |
+
assert usage["sandbox_billable_seconds_estimate"] == 3600
|
| 82 |
assert usage["inference_usd"] == 0.375
|
| 83 |
assert usage["hf_jobs_estimated_usd"] == 1.5
|
| 84 |
+
assert usage["sandbox_estimated_usd"] == 0.05
|
| 85 |
+
assert usage["total_usd"] == 1.925
|
| 86 |
|
| 87 |
|
| 88 |
def test_aggregate_usage_events_treats_missing_costs_as_zero():
|
|
|
|
| 100 |
assert usage["total_usd"] == 0.0
|
| 101 |
|
| 102 |
|
| 103 |
+
def test_aggregate_usage_events_ignores_active_sandbox_before_destroy():
|
| 104 |
+
usage = aggregate_usage_events(
|
| 105 |
+
[
|
| 106 |
+
_event(
|
| 107 |
+
"sandbox_create",
|
| 108 |
+
{
|
| 109 |
+
"sandbox_id": "alice/sandbox-12345678",
|
| 110 |
+
"hardware": "a100-large",
|
| 111 |
+
},
|
| 112 |
+
)
|
| 113 |
+
]
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
assert usage["sandbox_count"] == 0
|
| 117 |
+
assert usage["sandbox_estimated_usd"] == 0.0
|
| 118 |
+
assert usage["sandbox_billable_seconds_estimate"] == 0
|
| 119 |
+
assert usage["total_usd"] == 0.0
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def test_aggregate_usage_events_counts_cpu_basic_sandbox_as_free():
|
| 123 |
+
usage = aggregate_usage_events(
|
| 124 |
+
[
|
| 125 |
+
_event(
|
| 126 |
+
"sandbox_create",
|
| 127 |
+
{
|
| 128 |
+
"sandbox_id": "alice/sandbox-12345678",
|
| 129 |
+
"hardware": "cpu-basic",
|
| 130 |
+
},
|
| 131 |
+
),
|
| 132 |
+
_event(
|
| 133 |
+
"sandbox_destroy",
|
| 134 |
+
{
|
| 135 |
+
"sandbox_id": "alice/sandbox-12345678",
|
| 136 |
+
"lifetime_s": 3600,
|
| 137 |
+
},
|
| 138 |
+
),
|
| 139 |
+
]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
assert usage["sandbox_count"] == 1
|
| 143 |
+
assert usage["sandbox_estimated_usd"] == 0.0
|
| 144 |
+
assert usage["sandbox_billable_seconds_estimate"] == 0
|
| 145 |
+
assert usage["total_usd"] == 0.0
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def test_aggregate_usage_events_falls_back_to_sandbox_timestamps():
|
| 149 |
+
usage = aggregate_usage_events(
|
| 150 |
+
[
|
| 151 |
+
_event(
|
| 152 |
+
"sandbox_create",
|
| 153 |
+
{
|
| 154 |
+
"sandbox_id": "alice/sandbox-12345678",
|
| 155 |
+
"hardware": "t4-small",
|
| 156 |
+
},
|
| 157 |
+
created_at="2026-06-01T12:00:00+00:00",
|
| 158 |
+
),
|
| 159 |
+
_event(
|
| 160 |
+
"sandbox_destroy",
|
| 161 |
+
{"sandbox_id": "alice/sandbox-12345678"},
|
| 162 |
+
created_at="2026-06-01T12:30:00+00:00",
|
| 163 |
+
),
|
| 164 |
+
]
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
assert usage["sandbox_count"] == 1
|
| 168 |
+
assert usage["sandbox_billable_seconds_estimate"] == 1800
|
| 169 |
+
assert usage["sandbox_estimated_usd"] == 0.3
|
| 170 |
+
assert usage["total_usd"] == 0.3
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def test_usage_event_type_allowlists_include_sandbox_lifecycle():
|
| 174 |
+
assert set(USAGE_EVENT_TYPES) >= {"sandbox_create", "sandbox_destroy"}
|
| 175 |
+
assert set(session_persistence.USAGE_EVENT_TYPES) >= {
|
| 176 |
+
"sandbox_create",
|
| 177 |
+
"sandbox_destroy",
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
|
| 181 |
def test_account_bucket_from_hf_billing_usage_v2():
|
| 182 |
usage = _account_bucket_from_billing_usage(
|
| 183 |
{
|
|
|
|
| 211 |
)
|
| 212 |
|
| 213 |
assert windows["timezone"] == "America/Los_Angeles"
|
|
|
|
| 214 |
assert windows["month_start_utc"] == datetime(2026, 6, 1, 7, 0, tzinfo=UTC)
|
| 215 |
|
| 216 |
|
|
|
|
| 260 |
|
| 261 |
|
| 262 |
@pytest.mark.asyncio
|
| 263 |
+
async def test_usage_response_omits_app_rollups_without_session():
|
| 264 |
manager = _Manager(
|
| 265 |
{
|
| 266 |
"owner-session": _agent_session(
|
|
|
|
| 284 |
now=datetime(2026, 6, 1, 13, 0, tzinfo=UTC),
|
| 285 |
)
|
| 286 |
|
| 287 |
+
assert usage["session"] is None
|
|
|
|
|
|
|
| 288 |
|
| 289 |
|
| 290 |
@pytest.mark.asyncio
|
|
|
|
| 315 |
|
| 316 |
assert usage["session"]["session_id"] == "s1"
|
| 317 |
assert usage["session"]["inference_usd"] == 0.25
|
|
|
|
| 318 |
|
| 319 |
|
| 320 |
@pytest.mark.asyncio
|
| 321 |
+
async def test_runtime_usage_includes_requested_session_tokens():
|
| 322 |
manager = _Manager(
|
| 323 |
{
|
| 324 |
"s1": _agent_session(
|
|
|
|
| 344 |
)
|
| 345 |
|
| 346 |
assert usage["session"]["llm_calls"] == 1
|
| 347 |
+
assert usage["session"]["total_tokens"] == 42
|
|
|
|
|
|
|
| 348 |
|
| 349 |
|
| 350 |
@pytest.mark.asyncio
|
|
|
|
| 404 |
calls.append((start, end))
|
| 405 |
if start == usage_window_started_at:
|
| 406 |
used_nano = 500_000_000
|
|
|
|
|
|
|
| 407 |
else:
|
| 408 |
used_nano = 2_000_000_000
|
| 409 |
return {
|
|
|
|
| 431 |
|
| 432 |
assert usage["hf_account"]["available"] is True
|
| 433 |
assert usage["hf_account"]["current_session"]["inference_providers_usd"] == 0.5
|
|
|
|
| 434 |
assert usage["hf_account"]["month"]["inference_providers_usd"] == 2.0
|
| 435 |
assert usage["hf_account"]["inference_providers_credits"] == {
|
| 436 |
"included_usd": 2.0,
|
|
|
|
| 443 |
"period_end": None,
|
| 444 |
}
|
| 445 |
assert {start for start, _ in calls} == {
|
|
|
|
| 446 |
datetime(2026, 6, 1, 0, 0, tzinfo=UTC),
|
| 447 |
usage_window_started_at,
|
| 448 |
}
|
|
|
|
| 541 |
session_id="s1",
|
| 542 |
timezone_name="UTC",
|
| 543 |
now=datetime(2026, 6, 5, 13, 0, tzinfo=UTC),
|
|
|
|
| 544 |
)
|
| 545 |
|
| 546 |
assert usage["hf_account"]["current_session"]["window_start"] == (
|
|
|
|
| 553 |
|
| 554 |
|
| 555 |
@pytest.mark.asyncio
|
| 556 |
+
async def test_usage_response_loads_only_session_events(monkeypatch):
|
| 557 |
session_created_at = datetime(2026, 6, 5, 12, 0, tzinfo=UTC)
|
| 558 |
store = _RecordingStore()
|
| 559 |
manager = _Manager(
|
|
|
|
| 591 |
session_id="s1",
|
| 592 |
timezone_name="UTC",
|
| 593 |
now=datetime(2026, 6, 5, 13, 0, tzinfo=UTC),
|
|
|
|
| 594 |
)
|
| 595 |
|
| 596 |
assert store.calls == [("owner", {"session_id": "s1", "start": None, "end": None})]
|
|
|
|
| 599 |
session_created_at,
|
| 600 |
}
|
| 601 |
assert datetime(2026, 6, 5, 0, 0, tzinfo=UTC) not in billing_starts
|
|
|
|
|
|
|
|
|
|
| 602 |
assert usage["hf_account"]["month"]["inference_providers_usd"] == 0.0
|