File size: 3,298 Bytes
865e55c
104d147
865e55c
 
 
104d147
569b6c8
 
104d147
5eee87f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569b6c8
 
5eee87f
865e55c
 
104d147
865e55c
fd944cc
865e55c
8e50b08
865e55c
da8ad43
859e94e
865e55c
fd944cc
865e55c
 
 
 
 
 
 
 
 
 
 
104d147
 
 
 
 
 
 
 
865e55c
104d147
865e55c
 
 
 
 
104d147
 
865e55c
 
 
 
104d147
865e55c
 
 
 
 
 
 
 
 
 
104d147
865e55c
 
 
104d147
865e55c
104d147
 
 
 
 
 
 
 
865e55c
104d147
 
 
 
 
865e55c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import os

if not os.path.exists("./phi-4"):
    os.system('huggingface-cli download matteogeniaccio/phi-4 --local-dir ./phi-4 --include "phi-4/*"')

from safetensors.torch import load_file, save_file

@spaces.GPU
def merge_safetensors(input_dir, output_file):
    # 获取所有分片文件
    files = sorted([f for f in os.listdir(input_dir) if f.startswith('model-') and f.endswith('.safetensors')])
    
    # 合并所有张量
    merged_state_dict = {}
    for file in files:
        file_path = os.path.join(input_dir, file)
        print(f"Loading {file}...")
        state_dict = load_file(file_path)
        merged_state_dict.update(state_dict)
    
    # 保存合并后的文件
    print(f"Saving merged model to {output_file}...")
    save_file(merged_state_dict, output_file)
    print("Done!")

# 使用示例
input_dir = "./phi-4/phi-4"  # 包含分片文件的目录
output_file = "./phi-4/phi-4/model.safetensors"  # 合并后的文件路径

if not os.path.exists(output_file):
    merge_safetensors(input_dir, output_file)

# 加载 phi-4 模型和 tokenizer
torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "./phi-4/phi-4",  # 模型路径
    device_map="cuda",  # 使用 GPU
    # torch_dtype="auto",  # 自动选择数据类型
    trust_remote_code=True,  # 允许远程代码加载
    use_safetensors=True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained("./phi-4/phi-4")

# 设置 pipeline

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

# 响应函数
@spaces.GPU
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # 构造消息内容
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    # 将消息转换为字符串格式(适用于 text-generation)
    input_text = "\n".join(
        f"{msg['role']}: {msg['content']}" for msg in messages
    )

    # 生成响应
    generation_args = {
        "max_new_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "do_sample": temperature > 0,
        "return_full_text": False,
    }
    output = pipe(input_text, **generation_args)
    response = output[0]["generated_text"]

    # 返回流式响应
    for token in response:
        yield token

# Gradio 界面
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()