asimsultan commited on
Commit
838377c
·
verified ·
1 Parent(s): 6df679f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -15
app.py CHANGED
@@ -1,20 +1,12 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
- from vllm import LLM, SamplingParams
3
  import gradio as gr
 
4
 
5
- model_name = "aws-prototyping/MegaBeam-Mistral-7B-512k"
6
 
7
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_name,
10
- trust_remote_code=True
11
- )
12
 
13
- def chat(prompt: str):
14
- llm = LLM(model=model_name)
15
- sampling = SamplingParams(temperature=0.7, max_tokens=512)
16
- outputs = llm.generate([prompt], sampling)
17
- return outputs[0].outputs[0].text
18
 
19
- iface = gr.Interface(fn=chat, inputs="text", outputs="text")
20
- iface.launch()
 
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
 
4
+ MODEL_PATH = "model.gguf" # downloaded in advance
5
 
6
+ llm = Llama(model_path=MODEL_PATH, n_ctx=8192, n_threads=4)
 
 
 
 
7
 
8
+ def chat(prompt):
9
+ response = llm(prompt, max_tokens=512, temperature=0.7)
10
+ return response["choices"][0]["text"]
 
 
11
 
12
+ gr.Interface(fn=chat, inputs="text", outputs="text", title="MegaBeam Mistral 512K - GGUF").launch()