Spaces:

AFischer1985
/

GGUF-Interface

Paused

AFischer1985 commited on Dec 21, 2023

Commit

dc2c4dd

1 Parent(s): da994cb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,21 +10,18 @@ response = requests.get(url)
 with open("./model.gguf", mode="wb") as file:
   file.write(response.content)
 llm = Llama(model_path="./model.gguf")
-def generate_text(input_text):
     output = llm(f"Q: {input_text} A:", max_tokens=256, stop=["Q:", "\n"], echo=True)
     return output['choices'][0]['text']
-input_text = gr.inputs.Textbox(lines= 10, label="Enter your input text")
-output_text = gr.outputs.Textbox(label="Output text")
-description = "llama.cpp implementation in python [https://github.com/abetlen/llama-cpp-python]"
-examples = [
-    ["What is the capital of France? ", "The capital of France is Paris."],
-    ["Who wrote the novel 'Pride and Prejudice'?", "The novel 'Pride and Prejudice' was written by Jane Austen."],
-    ["What is the square root of 64?", "The square root of 64 is 8."]
-]
-gr.Interface(fn=generate_text, inputs=input_text, outputs=output_text, title="Llama Language Model", description=description, examples=examples).launch()

 with open("./model.gguf", mode="wb") as file:
   file.write(response.content)
+app = create_app(
+    Settings(
+        n_threads=2,  # set to number of cpu cores
+        model="model/gguf-model.bin",
+        embedding=False
+    )
+)
 llm = Llama(model_path="./model.gguf")
+def response(input_text, history):
     output = llm(f"Q: {input_text} A:", max_tokens=256, stop=["Q:", "\n"], echo=True)
     return output['choices'][0]['text']
+gr.ChatInterface(response).queue().launch(share=False, server_name="0.0.0.0", server_port=7864)