likewendy commited on
Commit
07f9f12
·
1 Parent(s): b9692bd
Files changed (1) hide show
  1. app.py +14 -62
app.py CHANGED
@@ -1,56 +1,14 @@
1
  import spaces
2
  import gradio as gr
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
  import os
6
 
7
- if not os.path.exists("./phi-4"):
8
- os.system('huggingface-cli download matteogeniaccio/phi-4 --local-dir ./phi-4 --include "phi-4/*"')
9
-
10
- from safetensors.torch import load_file, save_file
11
-
12
- @spaces.GPU
13
- def merge_safetensors(input_dir, output_file):
14
- # 获取所有分片文件
15
- files = sorted([f for f in os.listdir(input_dir) if f.startswith('model-') and f.endswith('.safetensors')])
16
-
17
- # 合并所有张量
18
- merged_state_dict = {}
19
- for file in files:
20
- file_path = os.path.join(input_dir, file)
21
- print(f"Loading {file}...")
22
- state_dict = load_file(file_path)
23
- merged_state_dict.update(state_dict)
24
-
25
- # 保存合并后的文件
26
- print(f"Saving merged model to {output_file}...")
27
- save_file(merged_state_dict, output_file)
28
- print("Done!")
29
-
30
- # 使用示例
31
- input_dir = "./phi-4/phi-4" # 包含分片文件的目录
32
- output_file = "./phi-4/phi-4/model.safetensors" # 合并后的文件路径
33
-
34
- if not os.path.exists(output_file):
35
- merge_safetensors(input_dir, output_file)
36
-
37
- # 加载 phi-4 模型和 tokenizer
38
- torch.random.manual_seed(0)
39
-
40
- model = AutoModelForCausalLM.from_pretrained(
41
- "./phi-4/phi-4", # 模型路径
42
- device_map="cuda", # 使用 GPU
43
- torch_dtype="auto", # 自动选择数据类型
44
- trust_remote_code=True, # 允许远程代码加载
45
- )
46
- tokenizer = AutoTokenizer.from_pretrained("./phi-4/phi-4")
47
-
48
- # 设置 pipeline
49
-
50
- pipe = pipeline(
51
  "text-generation",
52
- model=model,
53
- tokenizer=tokenizer,
 
54
  )
55
 
56
  # 响应函数
@@ -72,21 +30,15 @@ def respond(
72
  messages.append({"role": "assistant", "content": assistant_msg})
73
  messages.append({"role": "user", "content": message})
74
 
75
- # 将消息转换为字符串格式(适用于 text-generation)
76
- input_text = "\n".join(
77
- f"{msg['role']}: {msg['content']}" for msg in messages
78
- )
79
-
80
  # 生成响应
81
- generation_args = {
82
- "max_new_tokens": max_tokens,
83
- "temperature": temperature,
84
- "top_p": top_p,
85
- "do_sample": temperature > 0,
86
- "return_full_text": False,
87
- }
88
- output = pipe(input_text, **generation_args)
89
- response = output[0]["generated_text"]
90
 
91
  # 返回流式响应
92
  for token in response:
 
1
  import spaces
2
  import gradio as gr
3
+ import transformers
 
4
  import os
5
 
6
+ # 初始化pipeline
7
+ pipeline = transformers.pipeline(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "text-generation",
9
+ model="microsoft/phi-4",
10
+ model_kwargs={"torch_dtype": "auto"},
11
+ device_map="auto",
12
  )
13
 
14
  # 响应函数
 
30
  messages.append({"role": "assistant", "content": assistant_msg})
31
  messages.append({"role": "user", "content": message})
32
 
 
 
 
 
 
33
  # 生成响应
34
+ outputs = pipeline(
35
+ messages,
36
+ max_new_tokens=max_tokens,
37
+ temperature=temperature,
38
+ top_p=top_p,
39
+ do_sample=(temperature > 0),
40
+ )
41
+ response = outputs[0]["generated_text"]
 
42
 
43
  # 返回流式响应
44
  for token in response: