gdnartea commited on
Commit
0d9b453
·
verified ·
1 Parent(s): dedd577

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -1,17 +1,37 @@
1
- from transformers import pipeline
2
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # Initialize the text generation pipeline
5
- generator = pipeline('text-generation', model='microsoft/Phi-3-mini-4k-instruct-gguf')
6
 
7
- def generate_text(prompt):
8
- # Generate text
9
- output = generator(prompt, max_length=100)
10
- return output[0]['generated_text']
11
 
12
  # Create a Gradio interface
13
  iface = gr.Interface(
14
- fn=generate_text,
15
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
16
  outputs=gr.Textbox()
17
  )
 
1
+ # microsoft/Phi-3-mini-4k-instruct-gguf
2
+
3
+ from llama_cpp import Llama
4
+
5
+
6
+ llm = Llama(
7
+ model_path="./Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
8
+ n_ctx=2048, # The max sequence length to use - note that longer sequence lengths require much more resources
9
+ n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
10
+ n_gpu_layers=0, # The number of layers to offload to GPU, if you have GPU acceleration available. Set to 0 if no GPU acceleration is available on your system.
11
+ )
12
+
13
+ prompt = "How to explain Internet to a medieval knight?"
14
+
15
+ def process_text(prompt):
16
+
17
+ # Simple inference example
18
+ output = llm(
19
+ f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
20
+ max_tokens=256, # Generate up to 256 tokens
21
+ stop=["<|end|>"],
22
+ echo=True, # Whether to echo the prompt
23
+ )
24
+
25
+ return (output['choices'][0]['text'])
26
+
27
+
28
+
29
 
 
 
30
 
 
 
 
 
31
 
32
  # Create a Gradio interface
33
  iface = gr.Interface(
34
+ fn=process_text,
35
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
36
  outputs=gr.Textbox()
37
  )