Spaces:
Sleeping
Sleeping
File size: 816 Bytes
01a9f90 a9e9978 01a9f90 a9e9978 01a9f90 8434ae1 a9e9978 01a9f90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import gradio as gr
from llama_cpp import Llama
llm = Llama.from_pretrained(
repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
filename="Phi-3.5-mini-instruct-Q4_K_M.gguf",
numa=True,
use_mmap=False,
use_mlock=True,
seed=-1,
# flash_attn=True,
# n_gpu_layers=-1,
n_batch=1024,
n_ctx=4095,
)
def respond(prompt: str):
stream = llm.create_chat_completion(stream=True, messages=[{"role": "user", "content": prompt}])
response = ""
for chunk in stream:
if "content" in chunk["choices"][0]["delta"]:
response += chunk["choices"][0]["delta"]["content"]
yield response
demo = gr.Interface(fn=respond, inputs=[gr.TextArea("What is the capital of France?")], outputs=[gr.TextArea()])
demo.launch(server_name="0.0.0.0", server_port=7860)
|