|
""" |
|
For calculating cost of fireworks ai serverless inference models. |
|
""" |
|
|
|
from typing import Tuple |
|
|
|
from litellm.types.utils import Usage |
|
from litellm.utils import get_model_info |
|
|
|
|
|
|
|
|
|
def get_base_model_for_pricing(model_name: str) -> str: |
|
""" |
|
Helper function for calculating together ai pricing. |
|
|
|
Returns: |
|
- str: model pricing category if mapped else received model name |
|
""" |
|
import re |
|
|
|
model_name = model_name.lower() |
|
|
|
|
|
moe_match = re.search(r"(\d+)x(\d+)b", model_name) |
|
if moe_match: |
|
total_billion = int(moe_match.group(1)) * int(moe_match.group(2)) |
|
if total_billion <= 56: |
|
return "fireworks-ai-moe-up-to-56b" |
|
elif total_billion <= 176: |
|
return "fireworks-ai-56b-to-176b" |
|
|
|
|
|
re_params_match = re.search(r"(\d+)b", model_name) |
|
if re_params_match is not None: |
|
params_match = str(re_params_match.group(1)) |
|
params_billion = float(params_match) |
|
|
|
|
|
if params_billion <= 16.0: |
|
return "fireworks-ai-up-to-16b" |
|
elif params_billion <= 80.0: |
|
return "fireworks-ai-16b-80b" |
|
|
|
|
|
return "fireworks-ai-default" |
|
|
|
|
|
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]: |
|
""" |
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens. |
|
|
|
Input: |
|
- model: str, the model name without provider prefix |
|
- usage: LiteLLM Usage block, containing anthropic caching information |
|
|
|
Returns: |
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd |
|
""" |
|
|
|
try: |
|
model_info = get_model_info(model=model, custom_llm_provider="fireworks_ai") |
|
except Exception: |
|
base_model = get_base_model_for_pricing(model_name=model) |
|
|
|
|
|
model_info = get_model_info( |
|
model=base_model, custom_llm_provider="fireworks_ai" |
|
) |
|
|
|
|
|
|
|
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"] |
|
|
|
|
|
completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"] |
|
|
|
return prompt_cost, completion_cost |
|
|