Commit
·
dd937a5
1
Parent(s):
6797f10
Refactor model ID handling and update requirements.txt
Browse files
app.py
CHANGED
@@ -1,6 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
os.environ["CUDA_LAUNCH_BLOCKING"]="1"
|
3 |
-
|
4 |
import spaces
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
import torch
|
@@ -13,9 +10,7 @@ models_available = [
|
|
13 |
"MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
|
14 |
"silma-ai/SILMA-9B-Instruct-v1.0",
|
15 |
"inceptionai/jais-adapted-7b-chat",
|
16 |
-
# "inceptionai/jais-adapted-13b-chat",
|
17 |
"inceptionai/jais-family-6p7b-chat",
|
18 |
-
# "inceptionai/jais-family-13b-chat",
|
19 |
"NousResearch/Meta-Llama-3.1-8B-Instruct",
|
20 |
# "unsloth/gemma-2-9b-it",
|
21 |
"NousResearch/Meta-Llama-3-8B-Instruct",
|
@@ -25,7 +20,6 @@ tokenizer_a, model_a = None, None
|
|
25 |
tokenizer_b, model_b = None, None
|
26 |
torch_dtype = torch.bfloat16
|
27 |
attn_implementation = "flash_attention_2"
|
28 |
-
# attn_implementation = None
|
29 |
|
30 |
def load_model_a(model_id):
|
31 |
global tokenizer_a, model_a
|
@@ -40,8 +34,9 @@ def load_model_a(model_id):
|
|
40 |
attn_implementation=attn_implementation,
|
41 |
trust_remote_code=True,
|
42 |
).eval()
|
43 |
-
except:
|
44 |
print(f"Using default attention implementation in {model_id}")
|
|
|
45 |
model_a = AutoModelForCausalLM.from_pretrained(
|
46 |
model_id,
|
47 |
torch_dtype=torch_dtype,
|
@@ -64,7 +59,8 @@ def load_model_b(model_id):
|
|
64 |
attn_implementation=attn_implementation,
|
65 |
trust_remote_code=True,
|
66 |
).eval()
|
67 |
-
except:
|
|
|
68 |
print(f"Using default attention implementation in {model_id}")
|
69 |
model_b = AutoModelForCausalLM.from_pretrained(
|
70 |
model_id,
|
|
|
|
|
|
|
|
|
1 |
import spaces
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
3 |
import torch
|
|
|
10 |
"MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
|
11 |
"silma-ai/SILMA-9B-Instruct-v1.0",
|
12 |
"inceptionai/jais-adapted-7b-chat",
|
|
|
13 |
"inceptionai/jais-family-6p7b-chat",
|
|
|
14 |
"NousResearch/Meta-Llama-3.1-8B-Instruct",
|
15 |
# "unsloth/gemma-2-9b-it",
|
16 |
"NousResearch/Meta-Llama-3-8B-Instruct",
|
|
|
20 |
tokenizer_b, model_b = None, None
|
21 |
torch_dtype = torch.bfloat16
|
22 |
attn_implementation = "flash_attention_2"
|
|
|
23 |
|
24 |
def load_model_a(model_id):
|
25 |
global tokenizer_a, model_a
|
|
|
34 |
attn_implementation=attn_implementation,
|
35 |
trust_remote_code=True,
|
36 |
).eval()
|
37 |
+
except Exception as e:
|
38 |
print(f"Using default attention implementation in {model_id}")
|
39 |
+
print(f"Error: {e}")
|
40 |
model_a = AutoModelForCausalLM.from_pretrained(
|
41 |
model_id,
|
42 |
torch_dtype=torch_dtype,
|
|
|
59 |
attn_implementation=attn_implementation,
|
60 |
trust_remote_code=True,
|
61 |
).eval()
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error: {e}")
|
64 |
print(f"Using default attention implementation in {model_id}")
|
65 |
model_b = AutoModelForCausalLM.from_pretrained(
|
66 |
model_id,
|