MohamedRashad commited on
Commit
dd937a5
·
1 Parent(s): 6797f10

Refactor model ID handling and update requirements.txt

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -1,6 +1,3 @@
1
- import os
2
- os.environ["CUDA_LAUNCH_BLOCKING"]="1"
3
-
4
  import spaces
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  import torch
@@ -13,9 +10,7 @@ models_available = [
13
  "MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
14
  "silma-ai/SILMA-9B-Instruct-v1.0",
15
  "inceptionai/jais-adapted-7b-chat",
16
- # "inceptionai/jais-adapted-13b-chat",
17
  "inceptionai/jais-family-6p7b-chat",
18
- # "inceptionai/jais-family-13b-chat",
19
  "NousResearch/Meta-Llama-3.1-8B-Instruct",
20
  # "unsloth/gemma-2-9b-it",
21
  "NousResearch/Meta-Llama-3-8B-Instruct",
@@ -25,7 +20,6 @@ tokenizer_a, model_a = None, None
25
  tokenizer_b, model_b = None, None
26
  torch_dtype = torch.bfloat16
27
  attn_implementation = "flash_attention_2"
28
- # attn_implementation = None
29
 
30
  def load_model_a(model_id):
31
  global tokenizer_a, model_a
@@ -40,8 +34,9 @@ def load_model_a(model_id):
40
  attn_implementation=attn_implementation,
41
  trust_remote_code=True,
42
  ).eval()
43
- except:
44
  print(f"Using default attention implementation in {model_id}")
 
45
  model_a = AutoModelForCausalLM.from_pretrained(
46
  model_id,
47
  torch_dtype=torch_dtype,
@@ -64,7 +59,8 @@ def load_model_b(model_id):
64
  attn_implementation=attn_implementation,
65
  trust_remote_code=True,
66
  ).eval()
67
- except:
 
68
  print(f"Using default attention implementation in {model_id}")
69
  model_b = AutoModelForCausalLM.from_pretrained(
70
  model_id,
 
 
 
 
1
  import spaces
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
  import torch
 
10
  "MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct",
11
  "silma-ai/SILMA-9B-Instruct-v1.0",
12
  "inceptionai/jais-adapted-7b-chat",
 
13
  "inceptionai/jais-family-6p7b-chat",
 
14
  "NousResearch/Meta-Llama-3.1-8B-Instruct",
15
  # "unsloth/gemma-2-9b-it",
16
  "NousResearch/Meta-Llama-3-8B-Instruct",
 
20
  tokenizer_b, model_b = None, None
21
  torch_dtype = torch.bfloat16
22
  attn_implementation = "flash_attention_2"
 
23
 
24
  def load_model_a(model_id):
25
  global tokenizer_a, model_a
 
34
  attn_implementation=attn_implementation,
35
  trust_remote_code=True,
36
  ).eval()
37
+ except Exception as e:
38
  print(f"Using default attention implementation in {model_id}")
39
+ print(f"Error: {e}")
40
  model_a = AutoModelForCausalLM.from_pretrained(
41
  model_id,
42
  torch_dtype=torch_dtype,
 
59
  attn_implementation=attn_implementation,
60
  trust_remote_code=True,
61
  ).eval()
62
+ except Exception as e:
63
+ print(f"Error: {e}")
64
  print(f"Using default attention implementation in {model_id}")
65
  model_b = AutoModelForCausalLM.from_pretrained(
66
  model_id,