Update app.py
Browse files
app.py
CHANGED
@@ -100,33 +100,6 @@ with gr.Blocks() as demo:
|
|
100 |
interactive=True,
|
101 |
info="Higher values produce more diverse outputs",
|
102 |
)
|
103 |
-
top_p = gr.Slider(
|
104 |
-
label="Top-p (nucleus) sampling",
|
105 |
-
value=0.40,
|
106 |
-
minimum=0.0,
|
107 |
-
maximum=1,
|
108 |
-
step=0.05,
|
109 |
-
interactive=True,
|
110 |
-
info="Higher values sample more low-probability tokens",
|
111 |
-
)
|
112 |
-
top_k = gr.Slider(
|
113 |
-
label="Top-k sampling",
|
114 |
-
value=20,
|
115 |
-
minimum=1,
|
116 |
-
maximum=100,
|
117 |
-
step=1,
|
118 |
-
interactive=True,
|
119 |
-
info="Sample from the top_k most likely tokens",
|
120 |
-
)
|
121 |
-
repetition_penalty = gr.Slider(
|
122 |
-
label="Repetition penalty",
|
123 |
-
value=1.2,
|
124 |
-
minimum=1.0,
|
125 |
-
maximum=2.0,
|
126 |
-
step=0.05,
|
127 |
-
interactive=True,
|
128 |
-
info="Penalize repeated tokens",
|
129 |
-
)
|
130 |
|
131 |
# Generation inference
|
132 |
def generate(
|
@@ -134,21 +107,14 @@ with gr.Blocks() as demo:
|
|
134 |
history,
|
135 |
max_new_tokens: int,
|
136 |
temperature: float,
|
137 |
-
top_p: float,
|
138 |
-
top_k: int,
|
139 |
-
repetition_penalty: float,
|
140 |
):
|
141 |
generation_config = {
|
142 |
"max_new_tokens": max_new_tokens,
|
143 |
"temperature": temperature,
|
144 |
-
"top_p": top_p,
|
145 |
-
"top_k": top_k,
|
146 |
-
"repetition_penalty": repetition_penalty,
|
147 |
}
|
148 |
inference = pipe(sequences=message, streaming=True, **generation_config)
|
149 |
history[-1][1] += message
|
150 |
for token in inference:
|
151 |
-
print(token.generations[0].text)
|
152 |
history[-1][1] += token.generations[0].text
|
153 |
yield history
|
154 |
print(pipe.timer_manager)
|
@@ -172,9 +138,6 @@ with gr.Blocks() as demo:
|
|
172 |
chatbot,
|
173 |
max_new_tokens,
|
174 |
temperature,
|
175 |
-
top_p,
|
176 |
-
top_k,
|
177 |
-
repetition_penalty,
|
178 |
],
|
179 |
outputs=[chatbot],
|
180 |
api_name=False,
|
@@ -199,9 +162,6 @@ with gr.Blocks() as demo:
|
|
199 |
chatbot,
|
200 |
max_new_tokens,
|
201 |
temperature,
|
202 |
-
top_p,
|
203 |
-
top_k,
|
204 |
-
repetition_penalty,
|
205 |
],
|
206 |
outputs=[chatbot],
|
207 |
api_name=False,
|
@@ -226,9 +186,6 @@ with gr.Blocks() as demo:
|
|
226 |
chatbot,
|
227 |
max_new_tokens,
|
228 |
temperature,
|
229 |
-
top_p,
|
230 |
-
top_k,
|
231 |
-
repetition_penalty,
|
232 |
],
|
233 |
outputs=[chatbot],
|
234 |
api_name=False,
|
|
|
100 |
interactive=True,
|
101 |
info="Higher values produce more diverse outputs",
|
102 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
# Generation inference
|
105 |
def generate(
|
|
|
107 |
history,
|
108 |
max_new_tokens: int,
|
109 |
temperature: float,
|
|
|
|
|
|
|
110 |
):
|
111 |
generation_config = {
|
112 |
"max_new_tokens": max_new_tokens,
|
113 |
"temperature": temperature,
|
|
|
|
|
|
|
114 |
}
|
115 |
inference = pipe(sequences=message, streaming=True, **generation_config)
|
116 |
history[-1][1] += message
|
117 |
for token in inference:
|
|
|
118 |
history[-1][1] += token.generations[0].text
|
119 |
yield history
|
120 |
print(pipe.timer_manager)
|
|
|
138 |
chatbot,
|
139 |
max_new_tokens,
|
140 |
temperature,
|
|
|
|
|
|
|
141 |
],
|
142 |
outputs=[chatbot],
|
143 |
api_name=False,
|
|
|
162 |
chatbot,
|
163 |
max_new_tokens,
|
164 |
temperature,
|
|
|
|
|
|
|
165 |
],
|
166 |
outputs=[chatbot],
|
167 |
api_name=False,
|
|
|
186 |
chatbot,
|
187 |
max_new_tokens,
|
188 |
temperature,
|
|
|
|
|
|
|
189 |
],
|
190 |
outputs=[chatbot],
|
191 |
api_name=False,
|