megabeam-chat / app.py
asimsultan's picture
Update app.py
838377c verified
raw
history blame
382 Bytes
import gradio as gr
from llama_cpp import Llama
MODEL_PATH = "model.gguf" # downloaded in advance
llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_threads=4)
def chat(prompt):
response = llm(prompt, max_tokens=512, temperature=0.7)
return response["choices"][0]["text"]
gr.Interface(fn=chat, inputs="text", outputs="text", title="MegaBeam Mistral 512K - GGUF").launch()