|
import json |
|
from huggingface_hub import HfApi, hf_hub_url, HfFolder |
|
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError |
|
from requests import HTTPError |
|
|
|
from app.config import HF_TOKEN |
|
|
|
|
|
def is_model_on_hub( |
|
model_id: str, revision: str, token: str = HF_TOKEN, trust_remote_code: bool = False, test_tokenizer=True |
|
) -> (bool, str, dict): |
|
"""Checks if a model is on the hub. |
|
Returns: |
|
(bool, str, dict): a tuple with a boolean indicating if the model is on the hub, a string with the error message, and the model config |
|
""" |
|
if not token: |
|
return ( |
|
False, |
|
"No Hugging Face token provided. Please create a read token on the Hugging Face website and add it as a secret with the name `HF_TOKEN`.", |
|
None, |
|
) |
|
|
|
api = HfApi(token=token) |
|
try: |
|
model_info = api.model_info(model_id, revision=revision) |
|
model_config = None |
|
if hasattr(model_info, "config"): |
|
model_config = model_info.config |
|
except RepositoryNotFoundError: |
|
return False, f"Model {model_id} not found on hub", None |
|
except (HTTPError, GatedRepoError) as e: |
|
return False, f"Model {model_id} is gated, you need to accept the license agreement first.", None |
|
|
|
if trust_remote_code and test_tokenizer: |
|
from transformers import AutoTokenizer |
|
|
|
try: |
|
AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True, token=token) |
|
except Exception as e: |
|
return False, f"Could not load tokenizer for {model_id}. Error: {e}", None |
|
|
|
return True, "", model_config |
|
|