Spaces:

CreitinGameplays
/

ConvAIChat

Runtime error

App Files Files Community

CreitinGameplays commited on Mar 18

Commit

d9b9a34

verified ·

1 Parent(s): eaf9717

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -8,15 +8,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8000"))
 DESCRIPTION = """\
-# ConvAI 9b v2 Chat
 """
 # Load model with appropriate device configuration
 def load_model():
-    model_id = "CreitinGameplays/Llama-3.2-3B-Instruct-R1-v1"
     device = "cuda" if torch.cuda.is_available() else "cpu"
     # If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
@@ -39,7 +39,7 @@ def load_model():
 model, tokenizer, device = load_model()
-system_prompt_text = "You are Ricardinho."
 def generate(
     message: str,
@@ -47,9 +47,9 @@ def generate(
     system_prompt: str = system_prompt_text,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
-    top_p: float = 1.0,
-    top_k: int = 0,
-    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
@@ -107,21 +107,21 @@ chat_interface = gr.ChatInterface(
             minimum=0.05,
             maximum=1.0,
             step=0.05,
-            value=1.0,
         ),
         gr.Slider(
             label="Top-k",
             minimum=0,
             maximum=1000,
             step=1,
-            value=0,
         ),
         gr.Slider(
             label="Repetition penalty",
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=1.2,
         ),
     ],
     stop_btn=None,

 MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
 DESCRIPTION = """\
+# Chat
 """
 # Load model with appropriate device configuration
 def load_model():
+    model_id = "CreitinGameplays/Llama-3.1-8B-R1-v0.1"
     device = "cuda" if torch.cuda.is_available() else "cpu"
     # If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
 model, tokenizer, device = load_model()
+system_prompt_text = "You are a helpful AI assistant."
 def generate(
     message: str,
     system_prompt: str = system_prompt_text,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.1,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
             minimum=0.05,
             maximum=1.0,
             step=0.05,
+            value=0.9,
         ),
         gr.Slider(
             label="Top-k",
             minimum=0,
             maximum=1000,
             step=1,
+            value=50,
         ),
         gr.Slider(
             label="Repetition penalty",
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=1.1,
         ),
     ],
     stop_btn=None,