Spaces:

gdnartea
/

Chatty_Ashe

Runtime error

gdnartea commited on May 1, 2024

Commit

6ca34cf

verified ·

1 Parent(s): 8b2b696

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,43 @@
-# microsoft/Phi-3-mini-4k-instruct-gguf
-from llama_cpp import Llama
-llm = Llama(
-  model_path="microsoft/Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
-  n_ctx=2048,  # The max sequence length to use - note that longer sequence lengths require much more resources
-  n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
-  n_gpu_layers=0, # The number of layers to offload to GPU, if you have GPU acceleration available. Set to 0 if no GPU acceleration is available on your system.
 )
-prompt = "How to explain Internet to a medieval knight?"
-def process_text(prompt):
-  # Simple inference example
-  output = llm(
-    f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
-    max_tokens=256,  # Generate up to 256 tokens
-    stop=["<|end|>"],
-    echo=True,  # Whether to echo the prompt
-  )
-  return (output['choices'][0]['text'])
 # Create a Gradio interface
 iface = gr.Interface(
-    fn=process_text,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox()
 )

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+torch.random.manual_seed(0)
+model = AutoModelForCausalLM.from_pretrained(
+    "microsoft/Phi-3-mini-4k-instruct",
+    device_map="cuda",
+    torch_dtype="auto",
+    trust_remote_code=True,
+)
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [
+    {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
+    {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
+    {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
+]
+proc_pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
 )
+generation_args = {
+    "max_new_tokens": 500,
+    "return_full_text": False,
+    "temperature": 0.0,
+    "do_sample": False,
+}
+output = pipe(messages, **generation_args)
+print(output[0]['generated_text'])
 # Create a Gradio interface
 iface = gr.Interface(
+    fn=proc_pipe,
     inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox()
 )