freddyaboulton HF staff commited on
Commit
5eff8c0
Β·
verified Β·
1 Parent(s): fed6223

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +27 -0
  2. hf_chatinterface.py +49 -0
  3. hf_stream_demo.py +51 -0
  4. requirements.txt +1 -0
  5. transformers_local.py +77 -0
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+
4
+ from hf_chatinterface import demo as hf_chatinterface
5
+ from hf_stream_demo import demo as hf_stream
6
+ from transformers_local import demo as transformers_local
7
+
8
+
9
+ with gr.Blocks() as demo:
10
+ with gr.Tabs():
11
+ for file_name, sub_demo, name in [
12
+ ("hf_chatinterface", hf_chatinterface, "ChatInterface with HF Inference API πŸ€—"),
13
+ ("transformers_local", transformers_local, "ChatInterface with Transformers Local πŸ€—"),
14
+ ("hf_stream_demo", hf_stream, "Blocks with HF Inference API πŸ€—"),
15
+ ]:
16
+ with gr.Tab(name):
17
+ with gr.Tabs():
18
+ with gr.Tab("Demo"):
19
+ sub_demo.render()
20
+ with gr.Tab("Code"):
21
+ gr.Code(
22
+ value=Path(f"{file_name}.py").read_text(), language="python"
23
+ )
24
+
25
+
26
+ if __name__ == "__main__":
27
+ demo.launch()
hf_chatinterface.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+
4
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
5
+
6
+
7
+ def respond(
8
+ message,
9
+ history: list[tuple[str, str]],
10
+ system_message,
11
+ max_tokens,
12
+ temperature,
13
+ top_p,
14
+ ):
15
+ messages = [{"role": "system", "content": system_message}]
16
+
17
+ for val in history:
18
+ if val[0]:
19
+ messages.append({"role": "user", "content": val[0]})
20
+ if val[1]:
21
+ messages.append({"role": "assistant", "content": val[1]})
22
+
23
+ messages.append({"role": "user", "content": message})
24
+
25
+ response = ""
26
+
27
+ for message in client.chat_completion(
28
+ messages,
29
+ max_tokens=max_tokens,
30
+ stream=True,
31
+ temperature=temperature,
32
+ top_p=top_p,
33
+ ):
34
+ token = message.choices[0].delta.content
35
+
36
+ response += token
37
+ yield response
38
+
39
+
40
+ demo = gr.ChatInterface(
41
+ respond,
42
+ chatbot=gr.Chatbot(height=400),
43
+ additional_inputs=[
44
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
45
+ ],
46
+ )
47
+
48
+ if __name__ == "__main__":
49
+ demo.launch()
hf_stream_demo.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ from gradio_agentchatbot import AgentChatbot
3
+ import gradio as gr
4
+
5
+ """
6
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
+ """
8
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
+
10
+
11
+ def respond(
12
+ prompt: str,
13
+ history,
14
+ ):
15
+ messages = [{"role": "system", "content": "You are a friendly chatbot"}]
16
+
17
+ for val in history:
18
+ if val[0]:
19
+ messages.append({"role": "user", "content": val[0]})
20
+ if val[1]:
21
+ messages.append({"role": "assistant", "content": val[1]})
22
+
23
+ yield messages
24
+ messages.append({"role": "user", "content": prompt})
25
+ history.append([prompt, None])
26
+
27
+ response = ""
28
+ for message in client.chat_completion(
29
+ messages,
30
+ stream=True,
31
+ ):
32
+ response += message.choices[0].delta.content or ""
33
+ history[-1][1] = response
34
+ yield history
35
+
36
+
37
+ with gr.Blocks() as demo:
38
+ gr.Markdown("# Chat with Hugging Face Zephyr 7b πŸ€—")
39
+ chatbot = AgentChatbot(
40
+ label="Agent",
41
+ avatar_images=(
42
+ None,
43
+ "https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png",
44
+ ),
45
+ )
46
+ prompt = gr.Textbox(lines=1, label="Chat Message")
47
+ prompt.submit(respond, [prompt, chatbot], [chatbot])
48
+
49
+
50
+ if __name__ == "__main__":
51
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ transformers
transformers_local.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ from threading import Thread
4
+ import spaces
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
7
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", device_map="auto")
8
+ terminators = [
9
+ tokenizer.eos_token_id,
10
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
11
+ ]
12
+
13
+
14
+ @spaces.GPU(duration=120)
15
+ def chat_mistral7b_v0dot3(message: str,
16
+ history: list,
17
+ temperature: float,
18
+ max_new_tokens: int
19
+ ) -> str:
20
+
21
+ conversation = []
22
+ for user, assistant in history:
23
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
24
+ conversation.append({"role": "user", "content": message})
25
+
26
+ input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
27
+
28
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
29
+
30
+ generate_kwargs = dict(
31
+ input_ids= input_ids,
32
+ streamer=streamer,
33
+ max_new_tokens=max_new_tokens,
34
+ do_sample=True,
35
+ temperature=temperature,
36
+ eos_token_id=terminators,
37
+ )
38
+ # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
39
+ if temperature == 0:
40
+ generate_kwargs['do_sample'] = False
41
+
42
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
43
+ t.start()
44
+
45
+ outputs = []
46
+ for text in streamer:
47
+ outputs.append(text)
48
+ #print(outputs)
49
+ yield "".join(outputs)
50
+
51
+
52
+ # Gradio block
53
+ with gr.Blocks() as demo:
54
+ gr.Markdown("# ChatInterface with Mistral and Transformers πŸ€—")
55
+ gr.ChatInterface(
56
+ fn=chat_mistral7b_v0dot3,
57
+ fill_height=True,
58
+ additional_inputs_accordion=gr.Accordion(label="βš™οΈ Parameters", open=False, render=False),
59
+ additional_inputs=[
60
+ gr.Slider(minimum=0,
61
+ maximum=1,
62
+ step=0.1,
63
+ value=0.95,
64
+ label="Temperature",
65
+ render=False),
66
+ gr.Slider(minimum=128,
67
+ maximum=4096,
68
+ step=1,
69
+ value=512,
70
+ label="Max new tokens",
71
+ render=False ),
72
+ ],
73
+ )
74
+
75
+
76
+ if __name__ == "__main__":
77
+ demo.launch()