Akjava commited on
Commit
c411706
·
verified ·
1 Parent(s): 0e10553

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -38
app.py CHANGED
@@ -8,16 +8,14 @@ from threading import Thread
8
  import gradio as gr
9
 
10
  text_generator = None
11
- is_hugging_face = True
12
  model_id = "AXCXEPT/phi-4-deepseek-R1K-RL-EZO"
13
  #model_id = "AXCXEPT/phi-4-open-R1-Distill-EZOv1"
14
 
15
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
16
- #huggingface_token = None
17
  device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
  device = "cuda"
19
  dtype = torch.bfloat16
20
- #dtype = torch.float16
21
 
22
  if not huggingface_token:
23
  pass
@@ -32,36 +30,14 @@ if not huggingface_token:
32
 
33
 
34
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
35
- print(tokenizer.special_tokens_map)
36
 
37
  # 特殊トークンIDを確認
38
- print(tokenizer.eos_token_id)
39
- print(tokenizer.encode("<|im_end|>", add_special_tokens=False))
40
 
41
- print(model_id,device,dtype)
42
  histories = []
43
- #model = None
44
-
45
-
46
-
47
- if not is_hugging_face:
48
- model = AutoModelForCausalLM.from_pretrained(
49
- model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
50
- )
51
- text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device,stream=True ) #pipeline has not to(device)
52
-
53
- if next(model.parameters()).is_cuda:
54
- print("The model is on a GPU")
55
- else:
56
- print("The model is on a CPU")
57
-
58
- #print(f"text_generator.device='{text_generator.device}")
59
- if str(text_generator.device).strip() == 'cuda':
60
- print("The pipeline is using a GPU")
61
- else:
62
- print("The pipeline is using a CPU")
63
-
64
- print("initialized")
65
 
66
 
67
  def generate_text(messages):
@@ -81,18 +57,12 @@ def generate_text(messages):
81
  generated_output = ""
82
  thread.start()
83
  for new_text in streamer:
84
- generated_output += new_text.replace("<|im_end|>","")
85
  yield generated_output
86
- #generate_text.zerogpu = True
87
 
88
-
89
-
90
  @spaces.GPU(duration=120)
91
- def call_generate_text(message, history):
92
- # history.append({"role": "user", "content": message})
93
- #print(message)
94
- #print(history)
95
-
96
  messages = history+[{"role":"user","content":message}]
97
  try:
98
 
 
8
  import gradio as gr
9
 
10
  text_generator = None
11
+
12
  model_id = "AXCXEPT/phi-4-deepseek-R1K-RL-EZO"
13
  #model_id = "AXCXEPT/phi-4-open-R1-Distill-EZOv1"
14
 
15
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
16
  device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  device = "cuda"
18
  dtype = torch.bfloat16
 
19
 
20
  if not huggingface_token:
21
  pass
 
30
 
31
 
32
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
33
+ #print(tokenizer.special_tokens_map)
34
 
35
  # 特殊トークンIDを確認
36
+ #print(tokenizer.eos_token_id)
37
+ #print(tokenizer.encode("<|im_end|>", add_special_tokens=False))
38
 
39
+ #print(model_id,device,dtype)
40
  histories = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  def generate_text(messages):
 
57
  generated_output = ""
58
  thread.start()
59
  for new_text in streamer:
60
+ generated_output += new_text.replace("<|im_end|>","")#just replace
61
  yield generated_output
 
62
 
63
+ # SDK version is very important in README.md
 
64
  @spaces.GPU(duration=120)
65
+ def call_generate_text(message, history):
 
 
 
 
66
  messages = history+[{"role":"user","content":message}]
67
  try:
68