likewendy commited on
Commit
82f01bf
·
1 Parent(s): bfcb3c6
Files changed (1) hide show
  1. app.py +16 -21
app.py CHANGED
@@ -1,20 +1,15 @@
1
  import spaces
2
  import gradio as gr
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
  import os
6
 
7
- if not os.path.exists("./phi-4"):
8
- os.system('huggingface-cli download matteogeniaccio/phi-4 --local-dir ./phi-4 --include "phi-4/*"')
9
-
10
- import transformers
11
-
12
- # 初始化pipeline
13
- pipeline = transformers.pipeline(
14
- "text-generation",
15
- model="./phi-4/phi-4",
16
- model_kwargs={"torch_dtype": "auto"},
17
- device_map="auto",
18
  )
19
 
20
  # 响应函数
@@ -36,19 +31,19 @@ def respond(
36
  messages.append({"role": "assistant", "content": assistant_msg})
37
  messages.append({"role": "user", "content": message})
38
 
39
- # 生成响应
40
- outputs = pipeline(
41
- messages,
42
- max_new_tokens=max_tokens,
43
  temperature=temperature,
44
  top_p=top_p,
45
- do_sample=(temperature > 0),
46
  )
47
- response = outputs[0]["generated_text"]
48
 
49
  # 返回流式响应
50
- for token in response:
51
- yield token
 
52
 
53
  # Gradio 界面
54
  demo = gr.ChatInterface(
 
1
  import spaces
2
  import gradio as gr
3
+ from llama_cpp import Llama
 
4
  import os
5
 
6
+ # 初始化LLM
7
+ llm = Llama.from_pretrained(
8
+ repo_id="matteogeniaccio/phi-4",
9
+ filename="phi-4-Q4_K_M.gguf",
10
+ verbose=True,
11
+ main_gpu=0,
12
+ n_gpu_layers=-1
 
 
 
 
13
  )
14
 
15
  # 响应函数
 
31
  messages.append({"role": "assistant", "content": assistant_msg})
32
  messages.append({"role": "user", "content": message})
33
 
34
+ # 使用llama-cpp-python的方式生成响应
35
+ response = llm.create_chat_completion(
36
+ messages=messages,
37
+ max_tokens=max_tokens,
38
  temperature=temperature,
39
  top_p=top_p,
40
+ stream=True # 启用流式输出
41
  )
 
42
 
43
  # 返回流式响应
44
+ for chunk in response:
45
+ if chunk and chunk.get("choices") and chunk["choices"][0].get("delta", {}).get("content"):
46
+ yield chunk["choices"][0]["delta"]["content"]
47
 
48
  # Gradio 界面
49
  demo = gr.ChatInterface(