Spaces:

likewendy
/

phi-4

Sleeping

likewendy commited on Jan 7

Commit

2323cb2

1 Parent(s): 87a26ee

code

Files changed (3) hide show

README.md CHANGED Viewed

@@ -8,6 +8,11 @@ sdk_version: 5.0.1
 app_file: app.py
 pinned: false
 license: gpl-3.0
 ---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 app_file: app.py
 pinned: false
 license: gpl-3.0
+hf_oauth: true
+hf_oauth_scopes:
+  - read-repos
+  - write-repos
+  - manage-repos
 ---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 from llama_cpp import Llama
@@ -42,21 +43,24 @@ def respond(
             yield partial_message
 # Gradio 界面
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)"
-        ),
-    ],
-)
 if __name__ == "__main__":
     demo.launch()

+os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 import gradio as gr
 from llama_cpp import Llama
             yield partial_message
 # Gradio 界面
+with gr.Blocks() as demo:
+    gr.Markdown("You must be logged in to use GGUF-my-lora.")
+    gr.LoginButton(min_width=250)
+    gr.ChatInterface(
+        respond,
+        additional_inputs=[
+            gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+            gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+            gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+            gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-p (nucleus sampling)"
+            ),
+        ],
+    )
 if __name__ == "__main__":
     demo.launch()

bpp.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import spaces
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import os
@@ -30,16 +30,20 @@ pipe = pipeline(
     tokenizer=tokenizer,
 )
 generation_args = {
     "max_new_tokens": 500,
     "return_full_text": False,
     "temperature": 0.0,
     "do_sample": False,
 }
 @spaces.GPU
 def tuili():
-    output = pipe(messages, **generation_args)
-    return output
-print(tuili()[0]['generated_text'])

 import spaces
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer
 import os
     tokenizer=tokenizer,
 )
+streamer = TextIteratorStreamer(tokenizer)
 generation_args = {
     "max_new_tokens": 500,
     "return_full_text": False,
     "temperature": 0.0,
     "do_sample": False,
+    "streamer": streamer,
 }
 @spaces.GPU
 def tuili():
+    model.generate(messages, **generation_args)
+tuili()
+for new_text in streamer:
+    print(new_text)