Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
7302987
1
Parent(s):
7abc6a7
Added check on tokenizer to prevent submissions which won't run
Browse files- src/submission/check_validity.py +18 -2
- src/submission/submit.py +2 -2
src/submission/check_validity.py
CHANGED
|
@@ -8,6 +8,7 @@ import huggingface_hub
|
|
| 8 |
from huggingface_hub import ModelCard
|
| 9 |
from huggingface_hub.hf_api import ModelInfo
|
| 10 |
from transformers import AutoConfig
|
|
|
|
| 11 |
|
| 12 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
| 13 |
|
|
@@ -36,9 +37,24 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
| 36 |
return True, ""
|
| 37 |
|
| 38 |
|
| 39 |
-
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False) -> tuple[bool, str]:
|
| 40 |
try:
|
| 41 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
return True, None, config
|
| 43 |
|
| 44 |
except ValueError:
|
|
@@ -48,7 +64,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
| 48 |
None
|
| 49 |
)
|
| 50 |
|
| 51 |
-
except Exception:
|
| 52 |
return False, "was not found on hub!", None
|
| 53 |
|
| 54 |
|
|
|
|
| 8 |
from huggingface_hub import ModelCard
|
| 9 |
from huggingface_hub.hf_api import ModelInfo
|
| 10 |
from transformers import AutoConfig
|
| 11 |
+
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
| 12 |
|
| 13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
| 14 |
|
|
|
|
| 37 |
return True, ""
|
| 38 |
|
| 39 |
|
| 40 |
+
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
| 41 |
try:
|
| 42 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
| 43 |
+
if test_tokenizer:
|
| 44 |
+
tokenizer_config = get_tokenizer_config(model_name)
|
| 45 |
+
if tokenizer_config is not None:
|
| 46 |
+
tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
|
| 47 |
+
else:
|
| 48 |
+
tokenizer_class_candidate = config.tokenizer_class
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
| 52 |
+
if tokenizer_class is None:
|
| 53 |
+
return (
|
| 54 |
+
False,
|
| 55 |
+
f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
|
| 56 |
+
None
|
| 57 |
+
)
|
| 58 |
return True, None, config
|
| 59 |
|
| 60 |
except ValueError:
|
|
|
|
| 64 |
None
|
| 65 |
)
|
| 66 |
|
| 67 |
+
except Exception as e:
|
| 68 |
return False, "was not found on hub!", None
|
| 69 |
|
| 70 |
|
src/submission/submit.py
CHANGED
|
@@ -54,12 +54,12 @@ def add_new_eval(
|
|
| 54 |
|
| 55 |
# Is the model on the hub?
|
| 56 |
if weight_type in ["Delta", "Adapter"]:
|
| 57 |
-
base_model_on_hub, error, _ = is_model_on_hub(base_model, revision, H4_TOKEN)
|
| 58 |
if not base_model_on_hub:
|
| 59 |
return styled_error(f'Base model "{base_model}" {error}')
|
| 60 |
|
| 61 |
if not weight_type == "Adapter":
|
| 62 |
-
model_on_hub, error, _ = is_model_on_hub(model, revision)
|
| 63 |
if not model_on_hub:
|
| 64 |
return styled_error(f'Model "{model}" {error}')
|
| 65 |
|
|
|
|
| 54 |
|
| 55 |
# Is the model on the hub?
|
| 56 |
if weight_type in ["Delta", "Adapter"]:
|
| 57 |
+
base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=H4_TOKEN, test_tokenizer=True)
|
| 58 |
if not base_model_on_hub:
|
| 59 |
return styled_error(f'Base model "{base_model}" {error}')
|
| 60 |
|
| 61 |
if not weight_type == "Adapter":
|
| 62 |
+
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
| 63 |
if not model_on_hub:
|
| 64 |
return styled_error(f'Model "{model}" {error}')
|
| 65 |
|