Spaces:

choltha
/

free-CPU-inference-for-testing

Paused

Christoph Holthaus commited on Dec 8, 2023

Commit

ce6efbb

1 Parent(s): 9669900

update texts

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,8 +12,8 @@ llama_model_name = "TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF"
 print("! INITING DONE !")
 # Preparing things to work
-title = "Demo for 7B Models - Quantized"
-desc = '''<h1>Demo for 7B Models - Quantized</h1>
 Quantized to run in the free tier hosting.
 Have a quick way to test models or share them with others without hassle.
 It runs slow, as it's on cpu. Usable for basic tests.
@@ -35,7 +35,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 DESCRIPTION = "# Mistral-7B"
 if torch.cuda.is_available():
-    DESCRIPTION += "\n<p>This space is optimized for CPU only. Use another one if you want to go fast and use GPU. </p>"
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
@@ -142,7 +142,8 @@ chat_interface = gr.ChatInterface(
 )
 with gr.Blocks(css="style.css") as demo:
-    gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(
         value="Duplicate Space for private use",
         elem_id="duplicate-button",

 print("! INITING DONE !")
 # Preparing things to work
+title = "# Demo for 7B Models - Quantized"
+descr = '''<h1>Demo for 7B Models - Quantized</h1>
 Quantized to run in the free tier hosting.
 Have a quick way to test models or share them with others without hassle.
 It runs slow, as it's on cpu. Usable for basic tests.
 DESCRIPTION = "# Mistral-7B"
 if torch.cuda.is_available():
+    DESCRIPTION += "\n<p>This space is using CPU only. Use a different one if you want to go fast and use GPU. </p>"
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 )
 with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(title)
+    gr.Markdown(descr)
     gr.DuplicateButton(
         value="Duplicate Space for private use",
         elem_id="duplicate-button",