gdnartea commited on
Commit
6ca34cf
·
verified ·
1 Parent(s): 8b2b696

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -25
app.py CHANGED
@@ -1,37 +1,43 @@
1
- # microsoft/Phi-3-mini-4k-instruct-gguf
 
 
2
 
3
- from llama_cpp import Llama
4
 
5
-
6
- llm = Llama(
7
- model_path="microsoft/Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
8
- n_ctx=2048, # The max sequence length to use - note that longer sequence lengths require much more resources
9
- n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
10
- n_gpu_layers=0, # The number of layers to offload to GPU, if you have GPU acceleration available. Set to 0 if no GPU acceleration is available on your system.
 
 
 
 
 
 
 
 
 
 
 
 
11
  )
12
 
13
- prompt = "How to explain Internet to a medieval knight?"
14
-
15
- def process_text(prompt):
16
-
17
- # Simple inference example
18
- output = llm(
19
- f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
20
- max_tokens=256, # Generate up to 256 tokens
21
- stop=["<|end|>"],
22
- echo=True, # Whether to echo the prompt
23
- )
24
-
25
- return (output['choices'][0]['text'])
26
-
27
-
28
-
29
 
 
 
30
 
31
 
32
  # Create a Gradio interface
33
  iface = gr.Interface(
34
- fn=process_text,
35
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
36
  outputs=gr.Textbox()
37
  )
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
 
5
+ torch.random.manual_seed(0)
6
 
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ "microsoft/Phi-3-mini-4k-instruct",
9
+ device_map="cuda",
10
+ torch_dtype="auto",
11
+ trust_remote_code=True,
12
+ )
13
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
14
+
15
+ messages = [
16
+ {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
17
+ {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
18
+ {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
19
+ ]
20
+
21
+ proc_pipe = pipeline(
22
+ "text-generation",
23
+ model=model,
24
+ tokenizer=tokenizer,
25
  )
26
 
27
+ generation_args = {
28
+ "max_new_tokens": 500,
29
+ "return_full_text": False,
30
+ "temperature": 0.0,
31
+ "do_sample": False,
32
+ }
 
 
 
 
 
 
 
 
 
 
33
 
34
+ output = pipe(messages, **generation_args)
35
+ print(output[0]['generated_text'])
36
 
37
 
38
  # Create a Gradio interface
39
  iface = gr.Interface(
40
+ fn=proc_pipe,
41
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
42
  outputs=gr.Textbox()
43
  )