Spaces:

charanhu
/

TinyLlama-1B

Sleeping

charanhu commited on Dec 30, 2023

Commit

3ac57f8

1 Parent(s): bb14ba0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ class StopOnTokens(StoppingCriteria):
                 return True
         return False
-def predict(message, history):
     history_transformer_format = history + [[message, ""]]
     stop = StopOnTokens()
@@ -24,15 +24,14 @@ def predict(message, history):
                 for item in history_transformer_format])
     model_inputs = tokenizer([messages], return_tensors="pt")
-    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
         model_inputs,
-        streamer=streamer,
-        max_new_tokens=1024,
         do_sample=True,
         top_p=1,
         top_k=50,
-        temperature=1.0,
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
         )
@@ -46,4 +45,12 @@ def predict(message, history):
             yield partial_message
-gr.ChatInterface(predict).queue().launch()

                 return True
         return False
+def predict(message, history, temperature, max_new_tokens, min_new_tokens):
     history_transformer_format = history + [[message, ""]]
     stop = StopOnTokens()
                 for item in history_transformer_format])
     model_inputs = tokenizer([messages], return_tensors="pt")
     generate_kwargs = dict(
         model_inputs,
+        max_new_tokens=int(max_new_tokens),
+        min_new_tokens=int(min_new_tokens),
         do_sample=True,
         top_p=1,
         top_k=50,
+        temperature=float(temperature),
         num_beams=1,
         stopping_criteria=StoppingCriteriaList([stop])
         )
             yield partial_message
+iface = gr.ChatInterface(
+    fn=predict,
+    inputs=["text", "text", gr.Slider(minimum=0.1, maximum=2.0, value=1.0, label="Temperature"),
+            gr.Slider(minimum=1, maximum=2048, value=1024, label="Max Tokens"),
+            gr.Slider(minimum=1, maximum=1024, value=1, label="Min Tokens")],
+    outputs="text"
+)
+iface.queue().launch()