vicuna-7b-gguf-api

Runtime error

awinml commited on Jun 29, 2023

Commit

c4789a1

1 Parent(s): 3d309a7

Upload 2 files (#1)

- Upload 2 files (f73fee72cf55ceb109da3d3a719ef19a37a8c41f)

Files changed (2) hide show

app.py ADDED Viewed

+import os
+import urllib.request
+import gradio as gr
+from llama_cpp import Llama
+def download_file(file_link, filename):
+    # Checks if the file already exists before downloading
+    if not os.path.isfile(filename):
+        urllib.request.urlretrieve(file_link, filename)
+        print("File downloaded successfully.")
+    else:
+        print("File already exists.")
+# Dowloading GGML model from HuggingFace
+ggml_model_path = "https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_1.bin"
+filename = "ggml-vicuna-7b-1.1-q4_1.bin"
+download_file(ggml_model_path, filename)
+llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
+def generate_text(prompt):
+    output = llm(prompt, max_tokens=256, temperature=0.1, top_p=0.5, echo=False, stop=["#"])
+    output_text = output['choices'][0]['text']
+    return output_text
+description = "Vicuna-7B"
+examples = [
+    ["What is the capital of France? ", "The capital of France is Paris."],
+    ["Who wrote the novel 'Pride and Prejudice'?", "The novel 'Pride and Prejudice' was written by Jane Austen."],
+    ["What is the square root of 64?", "The square root of 64 is 8."]
+]
+gradio_interface = gr.Interface(
+  fn=generate_text,
+  inputs="text",
+  outputs="text",
+  examples=examples
+  title="Vicuna-7B",
+)
+gradio_interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ llama-cpp-python==0.1.62