Mahavaury2 commited on
Commit
bcae2af
·
verified ·
1 Parent(s): 0826c6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -7
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
  from collections.abc import Iterator
3
  from threading import Thread
@@ -28,6 +30,8 @@ if not torch.cuda.is_available():
28
  "\n<p style='color:red;'>Running on CPU - This is likely too large to run effectively.</p>"
29
  )
30
 
 
 
31
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
32
 
33
  #
@@ -37,15 +41,13 @@ if torch.cuda.is_available():
37
  model_id = "mistralai/Mistral-7B-Instruct-v0.3"
38
  tokenizer = AutoTokenizer.from_pretrained(
39
  model_id,
40
- trust_remote_code=True, # Might be needed for custom code
41
- use_auth_token=True
42
  )
43
  model = AutoModelForCausalLM.from_pretrained(
44
  model_id,
45
  torch_dtype=torch.float16,
46
  device_map="auto",
47
- trust_remote_code=True,
48
- use_auth_token=True
49
  )
50
 
51
  def generate(
@@ -98,12 +100,49 @@ def generate(
98
  yield "".join(outputs)
99
 
100
  #
101
- # 4) Build the Chat Interface without additional inputs
102
  #
103
  demo = gr.ChatInterface(
104
  fn=generate,
105
  description=DESCRIPTION,
106
- css=CUSTOM_CSS,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  stop_btn=None,
108
  examples=[
109
  ["Hello there! How are you doing?"],
@@ -116,4 +155,4 @@ demo = gr.ChatInterface(
116
  )
117
 
118
  if __name__ == "__main__":
119
- demo.queue(max_size=20).launch(share=True)
 
1
+ #!/usr/bin/env python
2
+
3
  import os
4
  from collections.abc import Iterator
5
  from threading import Thread
 
30
  "\n<p style='color:red;'>Running on CPU - This is likely too large to run effectively.</p>"
31
  )
32
 
33
+ MAX_MAX_NEW_TOKENS = 2048
34
+ DEFAULT_MAX_NEW_TOKENS = 1024
35
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
36
 
37
  #
 
41
  model_id = "mistralai/Mistral-7B-Instruct-v0.3"
42
  tokenizer = AutoTokenizer.from_pretrained(
43
  model_id,
44
+ trust_remote_code=True # Might be needed for custom code
 
45
  )
46
  model = AutoModelForCausalLM.from_pretrained(
47
  model_id,
48
  torch_dtype=torch.float16,
49
  device_map="auto",
50
+ trust_remote_code=True
 
51
  )
52
 
53
  def generate(
 
100
  yield "".join(outputs)
101
 
102
  #
103
+ # 4) Build the Chat Interface with extra sliders
104
  #
105
  demo = gr.ChatInterface(
106
  fn=generate,
107
  description=DESCRIPTION,
108
+ css=CUSTOM_CSS, # Use our pastel gradient
109
+ additional_inputs=[
110
+ gr.Slider(
111
+ label="Max new tokens",
112
+ minimum=1,
113
+ maximum=MAX_MAX_NEW_TOKENS,
114
+ step=1,
115
+ value=DEFAULT_MAX_NEW_TOKENS,
116
+ ),
117
+ gr.Slider(
118
+ label="Temperature",
119
+ minimum=0.1,
120
+ maximum=4.0,
121
+ step=0.1,
122
+ value=0.6,
123
+ ),
124
+ gr.Slider(
125
+ label="Top-p (nucleus sampling)",
126
+ minimum=0.05,
127
+ maximum=1.0,
128
+ step=0.05,
129
+ value=0.9,
130
+ ),
131
+ gr.Slider(
132
+ label="Top-k",
133
+ minimum=1,
134
+ maximum=1000,
135
+ step=1,
136
+ value=50,
137
+ ),
138
+ gr.Slider(
139
+ label="Repetition penalty",
140
+ minimum=1.0,
141
+ maximum=2.0,
142
+ step=0.05,
143
+ value=1.2,
144
+ ),
145
+ ],
146
  stop_btn=None,
147
  examples=[
148
  ["Hello there! How are you doing?"],
 
155
  )
156
 
157
  if __name__ == "__main__":
158
+ demo.queue(max_size=20).launch(share=True)