likewendy commited on
Commit
54f79ea
·
1 Parent(s): a58d727
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -3,15 +3,6 @@ import gradio as gr
3
  from llama_cpp import Llama
4
  import os
5
 
6
- # 初始化LLM
7
- llm = Llama.from_pretrained(
8
- repo_id="matteogeniaccio/phi-4",
9
- filename="phi-4-Q4_K_M.gguf",
10
- verbose=True,
11
- main_gpu=0,
12
- n_gpu_layers=-1
13
- )
14
-
15
  # 响应函数
16
  @spaces.GPU
17
  def respond(
@@ -31,6 +22,13 @@ def respond(
31
  messages.append({"role": "assistant", "content": assistant_msg})
32
  messages.append({"role": "user", "content": message})
33
 
 
 
 
 
 
 
 
34
  # 使用llama-cpp-python的方式生成响应
35
  response = llm.create_chat_completion(
36
  messages=messages,
 
3
  from llama_cpp import Llama
4
  import os
5
 
 
 
 
 
 
 
 
 
 
6
  # 响应函数
7
  @spaces.GPU
8
  def respond(
 
22
  messages.append({"role": "assistant", "content": assistant_msg})
23
  messages.append({"role": "user", "content": message})
24
 
25
+ llm = Llama.from_pretrained(
26
+ repo_id="matteogeniaccio/phi-4",
27
+ filename="phi-4-Q4_K_M.gguf",
28
+ verbose=True,
29
+ main_gpu=0,
30
+ n_gpu_layers=-1
31
+ )
32
  # 使用llama-cpp-python的方式生成响应
33
  response = llm.create_chat_completion(
34
  messages=messages,