Spaces:

gdnartea
/

Chatty_Ashe

Runtime error

gdnartea commited on May 1, 2024

Commit

0d9b453

verified ·

1 Parent(s): dedd577

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,17 +1,37 @@
-from transformers import pipeline
-import gradio as gr
-# Initialize the text generation pipeline
-generator = pipeline('text-generation', model='microsoft/Phi-3-mini-4k-instruct-gguf')
-def generate_text(prompt):
-    # Generate text
-    output = generator(prompt, max_length=100)
-    return output[0]['generated_text']
 # Create a Gradio interface
 iface = gr.Interface(
-    fn=generate_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox()
 )

+# microsoft/Phi-3-mini-4k-instruct-gguf
+from llama_cpp import Llama
+llm = Llama(
+  model_path="./Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
+  n_ctx=2048,  # The max sequence length to use - note that longer sequence lengths require much more resources
+  n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
+  n_gpu_layers=0, # The number of layers to offload to GPU, if you have GPU acceleration available. Set to 0 if no GPU acceleration is available on your system.
+)
+prompt = "How to explain Internet to a medieval knight?"
+def process_text(prompt):
+  # Simple inference example
+  output = llm(
+    f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
+    max_tokens=256,  # Generate up to 256 tokens
+    stop=["<|end|>"],
+    echo=True,  # Whether to echo the prompt
+  )
+  return (output['choices'][0]['text'])
 # Create a Gradio interface
 iface = gr.Interface(
+    fn=process_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox()
 )