File size: 3,298 Bytes
865e55c 104d147 865e55c 104d147 569b6c8 104d147 5eee87f 569b6c8 5eee87f 865e55c 104d147 865e55c fd944cc 865e55c 8e50b08 865e55c da8ad43 859e94e 865e55c fd944cc 865e55c 104d147 865e55c 104d147 865e55c 104d147 865e55c 104d147 865e55c 104d147 865e55c 104d147 865e55c 104d147 865e55c 104d147 865e55c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import os
if not os.path.exists("./phi-4"):
os.system('huggingface-cli download matteogeniaccio/phi-4 --local-dir ./phi-4 --include "phi-4/*"')
from safetensors.torch import load_file, save_file
@spaces.GPU
def merge_safetensors(input_dir, output_file):
# 获取所有分片文件
files = sorted([f for f in os.listdir(input_dir) if f.startswith('model-') and f.endswith('.safetensors')])
# 合并所有张量
merged_state_dict = {}
for file in files:
file_path = os.path.join(input_dir, file)
print(f"Loading {file}...")
state_dict = load_file(file_path)
merged_state_dict.update(state_dict)
# 保存合并后的文件
print(f"Saving merged model to {output_file}...")
save_file(merged_state_dict, output_file)
print("Done!")
# 使用示例
input_dir = "./phi-4/phi-4" # 包含分片文件的目录
output_file = "./phi-4/phi-4/model.safetensors" # 合并后的文件路径
if not os.path.exists(output_file):
merge_safetensors(input_dir, output_file)
# 加载 phi-4 模型和 tokenizer
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
"./phi-4/phi-4", # 模型路径
device_map="cuda", # 使用 GPU
# torch_dtype="auto", # 自动选择数据类型
trust_remote_code=True, # 允许远程代码加载
use_safetensors=True,
torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained("./phi-4/phi-4")
# 设置 pipeline
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
# 响应函数
@spaces.GPU
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# 构造消息内容
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
# 将消息转换为字符串格式(适用于 text-generation)
input_text = "\n".join(
f"{msg['role']}: {msg['content']}" for msg in messages
)
# 生成响应
generation_args = {
"max_new_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"do_sample": temperature > 0,
"return_full_text": False,
}
output = pipe(input_text, **generation_args)
response = output[0]["generated_text"]
# 返回流式响应
for token in response:
yield token
# Gradio 界面
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
),
],
)
if __name__ == "__main__":
demo.launch() |