mgoin commited on
Commit
0175c44
·
1 Parent(s): d54ef7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -43
app.py CHANGED
@@ -100,33 +100,6 @@ with gr.Blocks() as demo:
100
  interactive=True,
101
  info="Higher values produce more diverse outputs",
102
  )
103
- top_p = gr.Slider(
104
- label="Top-p (nucleus) sampling",
105
- value=0.40,
106
- minimum=0.0,
107
- maximum=1,
108
- step=0.05,
109
- interactive=True,
110
- info="Higher values sample more low-probability tokens",
111
- )
112
- top_k = gr.Slider(
113
- label="Top-k sampling",
114
- value=20,
115
- minimum=1,
116
- maximum=100,
117
- step=1,
118
- interactive=True,
119
- info="Sample from the top_k most likely tokens",
120
- )
121
- repetition_penalty = gr.Slider(
122
- label="Repetition penalty",
123
- value=1.2,
124
- minimum=1.0,
125
- maximum=2.0,
126
- step=0.05,
127
- interactive=True,
128
- info="Penalize repeated tokens",
129
- )
130
 
131
  # Generation inference
132
  def generate(
@@ -134,21 +107,14 @@ with gr.Blocks() as demo:
134
  history,
135
  max_new_tokens: int,
136
  temperature: float,
137
- top_p: float,
138
- top_k: int,
139
- repetition_penalty: float,
140
  ):
141
  generation_config = {
142
  "max_new_tokens": max_new_tokens,
143
  "temperature": temperature,
144
- "top_p": top_p,
145
- "top_k": top_k,
146
- "repetition_penalty": repetition_penalty,
147
  }
148
  inference = pipe(sequences=message, streaming=True, **generation_config)
149
  history[-1][1] += message
150
  for token in inference:
151
- print(token.generations[0].text)
152
  history[-1][1] += token.generations[0].text
153
  yield history
154
  print(pipe.timer_manager)
@@ -172,9 +138,6 @@ with gr.Blocks() as demo:
172
  chatbot,
173
  max_new_tokens,
174
  temperature,
175
- top_p,
176
- top_k,
177
- repetition_penalty,
178
  ],
179
  outputs=[chatbot],
180
  api_name=False,
@@ -199,9 +162,6 @@ with gr.Blocks() as demo:
199
  chatbot,
200
  max_new_tokens,
201
  temperature,
202
- top_p,
203
- top_k,
204
- repetition_penalty,
205
  ],
206
  outputs=[chatbot],
207
  api_name=False,
@@ -226,9 +186,6 @@ with gr.Blocks() as demo:
226
  chatbot,
227
  max_new_tokens,
228
  temperature,
229
- top_p,
230
- top_k,
231
- repetition_penalty,
232
  ],
233
  outputs=[chatbot],
234
  api_name=False,
 
100
  interactive=True,
101
  info="Higher values produce more diverse outputs",
102
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  # Generation inference
105
  def generate(
 
107
  history,
108
  max_new_tokens: int,
109
  temperature: float,
 
 
 
110
  ):
111
  generation_config = {
112
  "max_new_tokens": max_new_tokens,
113
  "temperature": temperature,
 
 
 
114
  }
115
  inference = pipe(sequences=message, streaming=True, **generation_config)
116
  history[-1][1] += message
117
  for token in inference:
 
118
  history[-1][1] += token.generations[0].text
119
  yield history
120
  print(pipe.timer_manager)
 
138
  chatbot,
139
  max_new_tokens,
140
  temperature,
 
 
 
141
  ],
142
  outputs=[chatbot],
143
  api_name=False,
 
162
  chatbot,
163
  max_new_tokens,
164
  temperature,
 
 
 
165
  ],
166
  outputs=[chatbot],
167
  api_name=False,
 
186
  chatbot,
187
  max_new_tokens,
188
  temperature,
 
 
 
189
  ],
190
  outputs=[chatbot],
191
  api_name=False,