likewendy commited on
Commit
2323cb2
·
1 Parent(s): 87a26ee
Files changed (3) hide show
  1. README.md +5 -0
  2. app.py +19 -15
  3. bpp.py +9 -5
README.md CHANGED
@@ -8,6 +8,11 @@ sdk_version: 5.0.1
8
  app_file: app.py
9
  pinned: false
10
  license: gpl-3.0
 
 
 
 
 
11
  ---
12
 
13
  An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
8
  app_file: app.py
9
  pinned: false
10
  license: gpl-3.0
11
+ hf_oauth: true
12
+ hf_oauth_scopes:
13
+ - read-repos
14
+ - write-repos
15
+ - manage-repos
16
  ---
17
 
18
  An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
@@ -42,21 +43,24 @@ def respond(
42
  yield partial_message
43
 
44
  # Gradio 界面
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)"
57
- ),
58
- ],
59
- )
 
 
 
60
 
61
  if __name__ == "__main__":
62
  demo.launch()
 
1
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
2
  import gradio as gr
3
  from llama_cpp import Llama
4
 
 
43
  yield partial_message
44
 
45
  # Gradio 界面
46
+ with gr.Blocks() as demo:
47
+ gr.Markdown("You must be logged in to use GGUF-my-lora.")
48
+ gr.LoginButton(min_width=250)
49
+ gr.ChatInterface(
50
+ respond,
51
+ additional_inputs=[
52
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
53
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
+ gr.Slider(
56
+ minimum=0.1,
57
+ maximum=1.0,
58
+ value=0.95,
59
+ step=0.05,
60
+ label="Top-p (nucleus sampling)"
61
+ ),
62
+ ],
63
+ )
64
 
65
  if __name__ == "__main__":
66
  demo.launch()
bpp.py CHANGED
@@ -1,6 +1,6 @@
1
  import spaces
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
 
5
  import os
6
 
@@ -30,16 +30,20 @@ pipe = pipeline(
30
  tokenizer=tokenizer,
31
  )
32
 
 
 
33
  generation_args = {
34
  "max_new_tokens": 500,
35
  "return_full_text": False,
36
  "temperature": 0.0,
37
  "do_sample": False,
 
38
  }
39
 
40
  @spaces.GPU
41
  def tuili():
42
- output = pipe(messages, **generation_args)
43
- return output
44
-
45
- print(tuili()[0]['generated_text'])
 
 
1
  import spaces
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer
4
 
5
  import os
6
 
 
30
  tokenizer=tokenizer,
31
  )
32
 
33
+ streamer = TextIteratorStreamer(tokenizer)
34
+
35
  generation_args = {
36
  "max_new_tokens": 500,
37
  "return_full_text": False,
38
  "temperature": 0.0,
39
  "do_sample": False,
40
+ "streamer": streamer,
41
  }
42
 
43
  @spaces.GPU
44
  def tuili():
45
+ model.generate(messages, **generation_args)
46
+
47
+ tuili()
48
+ for new_text in streamer:
49
+ print(new_text)