Plat commited on
Commit
cc9c601
·
1 Parent(s): bb80c35

feat: tanuki 8b instruct

Browse files
Files changed (2) hide show
  1. app.py +58 -20
  2. requirements.txt +5 -1
app.py CHANGED
@@ -1,12 +1,30 @@
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
@@ -14,6 +32,7 @@ def respond(
14
  max_tokens,
15
  temperature,
16
  top_p,
 
17
  ):
18
  messages = [{"role": "system", "content": system_message}]
19
 
@@ -25,27 +44,39 @@ def respond(
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- response = ""
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
 
 
 
 
34
  temperature=temperature,
 
35
  top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
 
 
 
49
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
  gr.Slider(
@@ -53,11 +84,18 @@ demo = gr.ChatInterface(
53
  maximum=1.0,
54
  value=0.95,
55
  step=0.05,
56
- label="Top-p (nucleus sampling)",
57
  ),
 
 
 
 
 
 
 
58
  ],
59
  )
60
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ from threading import Thread
4
+
5
  import gradio as gr
 
6
 
7
+ try:
8
+ import spaces
9
+ except:
10
+
11
+ class spaces:
12
+ @staticmethod
13
+ def GPU(duration: int):
14
+ return lambda x: x
15
+
16
+
17
+ MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
18
+
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ MODEL_NAME, load_in_8bit=True, device_map="auto"
21
+ )
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
+
24
+ print(model.hf_device_map)
25
 
26
 
27
+ @spaces.GPU(duration=10)
28
  def respond(
29
  message,
30
  history: list[tuple[str, str]],
 
32
  max_tokens,
33
  temperature,
34
  top_p,
35
+ top_k,
36
  ):
37
  messages = [{"role": "system", "content": system_message}]
38
 
 
44
 
45
  messages.append({"role": "user", "content": message})
46
 
47
+ tokenized_input = tokenizer.apply_chat_template(
48
+ messages, add_generation_prompt=True, tokenize=True, return_tensors="pt"
49
+ ).to(model.device)
50
 
51
+ streamer = TextIteratorStreamer(
52
+ tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
53
+ )
54
+ generate_kwargs = dict(
55
+ input_ids=tokenized_input,
56
+ streamer=streamer,
57
+ max_new_tokens=max_tokens,
58
+ do_sample=True,
59
  temperature=temperature,
60
+ top_k=top_k,
61
  top_p=top_p,
62
+ num_beams=1,
63
+ )
64
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
65
+ t.start()
66
+
67
+ partial_message = ""
68
+ for new_token in streamer:
69
+ partial_message += new_token
70
+ yield partial_message
71
 
 
 
72
 
 
 
 
73
  demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
76
+ gr.Textbox(
77
+ value="以下は、タスクを説明する指示と、文脈のある入力の組み合わせです。要求を適切に満たす応答を書きなさい。",
78
+ label="システムプロンプト",
79
+ ),
80
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
81
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
82
  gr.Slider(
 
84
  maximum=1.0,
85
  value=0.95,
86
  step=0.05,
87
+ label="Top-p",
88
  ),
89
+ gr.Slider(minimum=1, maximum=2000, value=200, step=10, label="Top-k"),
90
+ ],
91
+ examples=[
92
+ ["たぬきってなんですか?"],
93
+ ["情けは人の為ならずとはどういう意味ですか?"],
94
+ ["明晰夢とはなんですか?"],
95
+ ["シュレディンガー方程式とシュレディンガーの猫はどのような関係がありますか?"],
96
  ],
97
  )
98
 
99
 
100
  if __name__ == "__main__":
101
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,5 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
1
+ torch==2.3.0
2
+ accelerate==0.30.1
3
+ transformers==4.41.2
4
+ optimum-quanto==0.2.1
5
+ spaces==0.28.3