|
import asyncio |
|
import json |
|
import re |
|
from collections import defaultdict |
|
from datetime import date |
|
from os import getenv |
|
|
|
import pandas as pd |
|
from aiolimiter import AsyncLimiter |
|
from dotenv import load_dotenv |
|
from google.cloud import translate_v2 as translate |
|
from huggingface_hub import AsyncInferenceClient, HfApi |
|
from joblib.memory import Memory |
|
from openai import AsyncOpenAI, BadRequestError |
|
from requests import HTTPError, get |
|
|
|
|
|
|
|
important_models = [ |
|
"meta-llama/llama-4-maverick", |
|
"meta-llama/llama-3.3-70b-instruct", |
|
"meta-llama/llama-3.1-70b-instruct", |
|
"meta-llama/llama-3-70b-instruct", |
|
|
|
"openai/gpt-5", |
|
"openai/gpt-5-nano", |
|
"openai/gpt-4.1", |
|
"openai/gpt-4.1-mini", |
|
"openai/gpt-4.1-nano", |
|
"openai/gpt-4o-mini", |
|
"openai/gpt-4o-2024-11-20", |
|
"openai/gpt-oss-120b", |
|
"anthropic/claude-3.7-sonnet", |
|
"anthropic/claude-sonnet-4", |
|
"anthropic/claude-opus-4.1", |
|
"mistralai/mistral-small-3.1-24b-instruct", |
|
"mistralai/mistral-saba", |
|
"mistralai/mistral-nemo", |
|
"google/gemini-2.5-flash", |
|
"google/gemini-2.0-flash-lite-001", |
|
"google/gemma-3-27b-it", |
|
"qwen/qwen3-32b", |
|
"qwen/qwen3-235b-a22b", |
|
"qwen/qwen3-30b-a3b", |
|
|
|
|
|
|
|
|
|
"deepseek/deepseek-chat-v3-0324", |
|
"deepseek/deepseek-chat", |
|
"microsoft/phi-4", |
|
"microsoft/phi-4-multimodal-instruct", |
|
"amazon/nova-micro-v1", |
|
"moonshotai/kimi-k2", |
|
"x-ai/grok-4", |
|
] |
|
|
|
blocklist = [ |
|
"google/gemini-2.5-pro-preview", |
|
"google/gemini-2.5-pro", |
|
"google/gemini-2.5-flash-preview", |
|
"google/gemini-2.5-flash-lite-preview", |
|
"google/gemini-2.5-flash-preview-04-17", |
|
"google/gemini-2.5-flash-preview-05-20", |
|
"google/gemini-2.5-flash-lite-preview-06-17", |
|
"google/gemini-2.5-pro-preview-06-05", |
|
"google/gemini-2.5-pro-preview-05-06", |
|
"perplexity/sonar-deep-research", |
|
] |
|
|
|
transcription_models = [ |
|
"elevenlabs/scribe_v1", |
|
"openai/whisper-large-v3", |
|
|
|
|
|
] |
|
|
|
cache = Memory(location=".cache", verbose=0).cache |
|
|
|
|
|
@cache |
|
def get_models(date: date): |
|
return get("https://openrouter.ai/api/frontend/models").json()["data"] |
|
|
|
|
|
def get_model(permaslug): |
|
models = get_models(date.today()) |
|
slugs = [ |
|
m |
|
for m in models |
|
if m["permaslug"] == permaslug |
|
and m["endpoint"] |
|
and not m["endpoint"]["is_free"] |
|
] |
|
if len(slugs) == 0: |
|
|
|
print(f"no non-free model found for {permaslug}") |
|
return slugs[0] if len(slugs) >= 1 else None |
|
|
|
|
|
@cache |
|
def get_historical_popular_models(date: date): |
|
try: |
|
raw = get("https://openrouter.ai/rankings").text |
|
|
|
|
|
import re |
|
import json |
|
|
|
|
|
|
|
pattern = r"\\\"count\\\":([\d.]+).*?\\\"model_permaslug\\\":\\\"([^\\\"]+)\\\"" |
|
matches = re.findall(pattern, raw) |
|
|
|
if matches: |
|
|
|
model_counts = {} |
|
for count_str, model_slug in matches: |
|
count = float(count_str) |
|
if not model_slug.startswith("openrouter") and model_slug != "Others": |
|
|
|
base_model = model_slug.split(":")[0] |
|
model_counts[base_model] = model_counts.get(base_model, 0) + count |
|
|
|
|
|
sorted_models = sorted( |
|
model_counts.items(), key=lambda x: x[1], reverse=True |
|
) |
|
result = [] |
|
for model_slug, count in sorted_models[:20]: |
|
result.append({"slug": model_slug, "count": int(count)}) |
|
|
|
return result |
|
else: |
|
return [] |
|
|
|
except Exception as e: |
|
return [] |
|
|
|
|
|
@cache |
|
def get_current_popular_models(date: date): |
|
try: |
|
raw = get("https://openrouter.ai/rankings?view=day").text |
|
|
|
|
|
import re |
|
import json |
|
|
|
|
|
pattern = r"\\\"count\\\":([\d.]+).*?\\\"model_permaslug\\\":\\\"([^\\\"]+)\\\"" |
|
matches = re.findall(pattern, raw) |
|
|
|
if matches: |
|
|
|
model_counts = {} |
|
for count_str, model_slug in matches: |
|
count = float(count_str) |
|
if not model_slug.startswith("openrouter") and model_slug != "Others": |
|
|
|
base_model = model_slug.split(":")[0] |
|
model_counts[base_model] = model_counts.get(base_model, 0) + count |
|
|
|
|
|
sorted_models = sorted( |
|
model_counts.items(), key=lambda x: x[1], reverse=True |
|
) |
|
result = [] |
|
for model_slug, count in sorted_models[:10]: |
|
result.append({"slug": model_slug, "count": int(count)}) |
|
|
|
return result |
|
else: |
|
return [] |
|
|
|
except Exception as e: |
|
return [] |
|
|
|
|
|
def get_translation_models(): |
|
return pd.DataFrame( |
|
[ |
|
{ |
|
"id": "google/translate-v2", |
|
"name": "Google Translate", |
|
"provider_name": "Google", |
|
"cost": 20.0, |
|
"size": None, |
|
"type": "closed-source", |
|
"license": None, |
|
"tasks": ["translation_from", "translation_to"], |
|
} |
|
] |
|
) |
|
|
|
|
|
load_dotenv() |
|
client = AsyncOpenAI( |
|
base_url="https://openrouter.ai/api/v1", |
|
api_key=getenv("OPENROUTER_API_KEY"), |
|
) |
|
|
|
openrouter_rate_limit = AsyncLimiter(max_rate=20, time_period=1) |
|
elevenlabs_rate_limit = AsyncLimiter(max_rate=2, time_period=1) |
|
huggingface_rate_limit = AsyncLimiter(max_rate=5, time_period=1) |
|
google_rate_limit = AsyncLimiter(max_rate=10, time_period=1) |
|
|
|
|
|
@cache |
|
async def complete(**kwargs) -> str | None: |
|
async with openrouter_rate_limit: |
|
try: |
|
response = await client.chat.completions.create(**kwargs) |
|
except BadRequestError as e: |
|
if "filtered" in e.message: |
|
return None |
|
raise e |
|
if not response.choices: |
|
raise Exception(response) |
|
return response.choices[0].message.content.strip() |
|
|
|
|
|
translate_client = translate.Client() |
|
|
|
|
|
def get_google_supported_languages(): |
|
return [l["language"] for l in translate_client.get_languages()] |
|
|
|
|
|
@cache |
|
async def translate_google(text, source_language, target_language): |
|
async with google_rate_limit: |
|
response = translate_client.translate( |
|
text, source_language=source_language, target_language=target_language |
|
) |
|
return response["translatedText"] |
|
|
|
|
|
@cache |
|
async def transcribe_elevenlabs(path, model): |
|
modelname = model.split("/")[-1] |
|
client = AsyncElevenLabs(api_key=getenv("ELEVENLABS_API_KEY")) |
|
async with elevenlabs_rate_limit: |
|
with open(path, "rb") as file: |
|
response = await client.speech_to_text.convert( |
|
model_id=modelname, file=file |
|
) |
|
return response.text |
|
|
|
|
|
@cache |
|
async def transcribe_huggingface(path, model): |
|
client = AsyncInferenceClient(api_key=getenv("HUGGINGFACE_ACCESS_TOKEN")) |
|
async with huggingface_rate_limit: |
|
output = await client.automatic_speech_recognition(model=model, audio=path) |
|
return output.text |
|
|
|
|
|
async def transcribe(path, model="elevenlabs/scribe_v1"): |
|
provider, modelname = model.split("/") |
|
match provider: |
|
case "elevenlabs": |
|
return await transcribe_elevenlabs(path, modelname) |
|
case "openai" | "facebook": |
|
return await transcribe_huggingface(path, model) |
|
case _: |
|
raise ValueError(f"Model {model} not supported") |
|
|
|
|
|
def get_or_metadata(id): |
|
|
|
models = get_models(date.today()) |
|
metadata = next((m for m in models if m["slug"] == id), None) |
|
return metadata |
|
|
|
|
|
api = HfApi() |
|
|
|
|
|
@cache |
|
def get_hf_metadata(row): |
|
|
|
empty = { |
|
"hf_id": None, |
|
"creation_date": None, |
|
"size": None, |
|
"type": "closed-source", |
|
"license": None, |
|
} |
|
if not row: |
|
return empty |
|
id = row["hf_slug"] or row["slug"].split(":")[0] |
|
if not id: |
|
return empty |
|
try: |
|
info = api.model_info(id) |
|
license = "" |
|
if ( |
|
info.card_data |
|
and hasattr(info.card_data, "license") |
|
and info.card_data.license |
|
): |
|
license = ( |
|
info.card_data.license.replace("-", " ").replace("mit", "MIT").title() |
|
) |
|
return { |
|
"hf_id": info.id, |
|
"creation_date": info.created_at, |
|
"size": info.safetensors.total if info.safetensors else None, |
|
"type": "open-source", |
|
"license": license, |
|
} |
|
except HTTPError: |
|
return empty |
|
|
|
|
|
def get_cost(row): |
|
""" |
|
row: a row from the OpenRouter models dataframe |
|
""" |
|
try: |
|
cost = float(row["endpoint"]["pricing"]["completion"]) |
|
return round(cost * 1_000_000, 2) |
|
except (TypeError, KeyError): |
|
return None |
|
|
|
|
|
@cache |
|
def load_models(date: date): |
|
popular_models = ( |
|
get_historical_popular_models(date.today())[:20] |
|
+ get_current_popular_models(date.today())[:10] |
|
) |
|
popular_models = [m["slug"] for m in popular_models] |
|
all_model_candidates = set(important_models + popular_models) - set(blocklist) |
|
|
|
|
|
valid_models = [] |
|
|
|
for model_id in all_model_candidates: |
|
metadata = get_or_metadata(model_id) |
|
if metadata is not None: |
|
valid_models.append(model_id) |
|
|
|
models = pd.DataFrame(sorted(valid_models), columns=["id"]) |
|
or_metadata = models["id"].apply(get_or_metadata) |
|
hf_metadata = or_metadata.apply(get_hf_metadata) |
|
creation_date_hf = pd.to_datetime(hf_metadata.str["creation_date"]).dt.date |
|
creation_date_or = pd.to_datetime( |
|
or_metadata.str["created_at"].str.split("T").str[0] |
|
).dt.date |
|
|
|
models = models.assign( |
|
name=or_metadata.str["short_name"] |
|
.str.replace(" (free)", "") |
|
.str.replace(" (self-moderated)", ""), |
|
provider_name=or_metadata.str["name"].str.split(": ").str[0], |
|
cost=or_metadata.apply(get_cost), |
|
hf_id=hf_metadata.str["hf_id"], |
|
size=hf_metadata.str["size"], |
|
type=hf_metadata.str["type"], |
|
license=hf_metadata.str["license"], |
|
creation_date=creation_date_hf.combine_first(creation_date_or), |
|
) |
|
|
|
models = models[models["cost"] <= 15.0].reset_index(drop=True) |
|
models["tasks"] = [ |
|
[ |
|
"translation_from", |
|
"translation_to", |
|
"classification", |
|
"mmlu", |
|
"arc", |
|
"truthfulqa", |
|
"mgsm", |
|
] |
|
] * len(models) |
|
models = pd.concat([models, get_translation_models()]) |
|
return models |
|
|
|
|
|
models = load_models(date.today()) |
|
|