code
Browse files
app.py
CHANGED
@@ -3,15 +3,6 @@ import gradio as gr
|
|
3 |
from llama_cpp import Llama
|
4 |
import os
|
5 |
|
6 |
-
# 初始化LLM
|
7 |
-
llm = Llama.from_pretrained(
|
8 |
-
repo_id="matteogeniaccio/phi-4",
|
9 |
-
filename="phi-4-Q4_K_M.gguf",
|
10 |
-
verbose=True,
|
11 |
-
main_gpu=0,
|
12 |
-
n_gpu_layers=-1
|
13 |
-
)
|
14 |
-
|
15 |
# 响应函数
|
16 |
@spaces.GPU
|
17 |
def respond(
|
@@ -31,6 +22,13 @@ def respond(
|
|
31 |
messages.append({"role": "assistant", "content": assistant_msg})
|
32 |
messages.append({"role": "user", "content": message})
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# 使用llama-cpp-python的方式生成响应
|
35 |
response = llm.create_chat_completion(
|
36 |
messages=messages,
|
|
|
3 |
from llama_cpp import Llama
|
4 |
import os
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# 响应函数
|
7 |
@spaces.GPU
|
8 |
def respond(
|
|
|
22 |
messages.append({"role": "assistant", "content": assistant_msg})
|
23 |
messages.append({"role": "user", "content": message})
|
24 |
|
25 |
+
llm = Llama.from_pretrained(
|
26 |
+
repo_id="matteogeniaccio/phi-4",
|
27 |
+
filename="phi-4-Q4_K_M.gguf",
|
28 |
+
verbose=True,
|
29 |
+
main_gpu=0,
|
30 |
+
n_gpu_layers=-1
|
31 |
+
)
|
32 |
# 使用llama-cpp-python的方式生成响应
|
33 |
response = llm.create_chat_completion(
|
34 |
messages=messages,
|