AFischer1985 commited on
Commit
dc2c4dd
·
1 Parent(s): da994cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -10,21 +10,18 @@ response = requests.get(url)
10
  with open("./model.gguf", mode="wb") as file:
11
  file.write(response.content)
12
 
 
 
 
 
 
 
 
 
13
  llm = Llama(model_path="./model.gguf")
14
- def generate_text(input_text):
15
  output = llm(f"Q: {input_text} A:", max_tokens=256, stop=["Q:", "\n"], echo=True)
16
  return output['choices'][0]['text']
17
 
18
- input_text = gr.inputs.Textbox(lines= 10, label="Enter your input text")
19
- output_text = gr.outputs.Textbox(label="Output text")
20
-
21
- description = "llama.cpp implementation in python [https://github.com/abetlen/llama-cpp-python]"
22
-
23
- examples = [
24
- ["What is the capital of France? ", "The capital of France is Paris."],
25
- ["Who wrote the novel 'Pride and Prejudice'?", "The novel 'Pride and Prejudice' was written by Jane Austen."],
26
- ["What is the square root of 64?", "The square root of 64 is 8."]
27
- ]
28
-
29
- gr.Interface(fn=generate_text, inputs=input_text, outputs=output_text, title="Llama Language Model", description=description, examples=examples).launch()
30
 
 
10
  with open("./model.gguf", mode="wb") as file:
11
  file.write(response.content)
12
 
13
+ app = create_app(
14
+ Settings(
15
+ n_threads=2, # set to number of cpu cores
16
+ model="model/gguf-model.bin",
17
+ embedding=False
18
+ )
19
+ )
20
+
21
  llm = Llama(model_path="./model.gguf")
22
+ def response(input_text, history):
23
  output = llm(f"Q: {input_text} A:", max_tokens=256, stop=["Q:", "\n"], echo=True)
24
  return output['choices'][0]['text']
25
 
26
+ gr.ChatInterface(response).queue().launch(share=False, server_name="0.0.0.0", server_port=7864)
 
 
 
 
 
 
 
 
 
 
 
27