richardkimsm89 commited on
Commit
12fb4a0
·
verified ·
1 Parent(s): 8dbe12b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -1
app.py CHANGED
@@ -3,6 +3,61 @@
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  model = "google/gemma-2-27b-it"
7
  #model = "google/gemma-2-9b-it"
8
  #model = "google/gemma-2-2b-it"
@@ -58,7 +113,6 @@ app = gr.ChatInterface(
58
  if __name__ == "__main__":
59
  app.launch()
60
 
61
- """
62
  # Pipeline
63
 
64
  import gradio as gr
 
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
 
6
+ model = "google/gemma-2-27b-it"
7
+ client = InferenceClient(model)
8
+
9
+ def fn_text(
10
+ prompt,
11
+ history,
12
+ #system_prompt,
13
+ max_tokens,
14
+ temperature,
15
+ top_p,
16
+ ):
17
+ messages = [{"role": "user", "content": prompt}]
18
+ history.append(messages[0])
19
+
20
+ stream = client.chat.completions.create(
21
+ model = model,
22
+ messages = history,
23
+ max_tokens = max_tokens,
24
+ temperature = temperature,
25
+ top_p = top_p,
26
+ stream = True
27
+ )
28
+
29
+ chunks = []
30
+ for chunk in stream:
31
+ chunks.append(chunk.choices[0].delta.content or "")
32
+ yield "".join(chunks)
33
+
34
+ app_text = gr.ChatInterface(
35
+ fn = fn_text,
36
+ type = "messages",
37
+ additional_inputs = [
38
+ #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
39
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
40
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
41
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
42
+ ],
43
+ title = "Google Gemma",
44
+ description = model,
45
+ )
46
+
47
+ app = gr.TabbedInterface(
48
+ [app_text],
49
+ ["Text"]
50
+ ).launch()
51
+
52
+ #if __name__ == "__main__":
53
+ # app.launch()
54
+
55
+ """
56
+ # Inference
57
+
58
+ import gradio as gr
59
+ from huggingface_hub import InferenceClient
60
+
61
  model = "google/gemma-2-27b-it"
62
  #model = "google/gemma-2-9b-it"
63
  #model = "google/gemma-2-2b-it"
 
113
  if __name__ == "__main__":
114
  app.launch()
115
 
 
116
  # Pipeline
117
 
118
  import gradio as gr