Spaces:

Mahavaury2
/

mistralai-Mistral-7B-Instruct-v0.3

Sleeping

App Files Files Community

Mahavaury2 commited on Feb 18

Commit

bcae2af

verified ·

1 Parent(s): 0826c6b

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -7

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import os
 from collections.abc import Iterator
 from threading import Thread
@@ -28,6 +30,8 @@ if not torch.cuda.is_available():
         "\n<p style='color:red;'>Running on CPU - This is likely too large to run effectively.</p>"
     )
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 #
@@ -37,15 +41,13 @@ if torch.cuda.is_available():
     model_id = "mistralai/Mistral-7B-Instruct-v0.3"
     tokenizer = AutoTokenizer.from_pretrained(
         model_id,
-        trust_remote_code=True,  # Might be needed for custom code
-        use_auth_token=True
     )
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.float16,
         device_map="auto",
-        trust_remote_code=True,
-        use_auth_token=True
     )
 def generate(
@@ -98,12 +100,49 @@ def generate(
         yield "".join(outputs)
 #
-# 4) Build the Chat Interface without additional inputs
 #
 demo = gr.ChatInterface(
     fn=generate,
     description=DESCRIPTION,
-    css=CUSTOM_CSS,
     stop_btn=None,
     examples=[
         ["Hello there! How are you doing?"],
@@ -116,4 +155,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch(share=True)

+#!/usr/bin/env python
 import os
 from collections.abc import Iterator
 from threading import Thread
         "\n<p style='color:red;'>Running on CPU - This is likely too large to run effectively.</p>"
     )
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 #
     model_id = "mistralai/Mistral-7B-Instruct-v0.3"
     tokenizer = AutoTokenizer.from_pretrained(
         model_id,
+        trust_remote_code=True  # Might be needed for custom code
     )
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.float16,
         device_map="auto",
+        trust_remote_code=True
     )
 def generate(
         yield "".join(outputs)
 #
+# 4) Build the Chat Interface with extra sliders
 #
 demo = gr.ChatInterface(
     fn=generate,
     description=DESCRIPTION,
+    css=CUSTOM_CSS,  # Use our pastel gradient
+    additional_inputs=[
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.6,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.2,
+        ),
+    ],
     stop_btn=None,
     examples=[
         ["Hello there! How are you doing?"],
 )
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch(share=True)