CreitinGameplays commited on
Commit
d9b9a34
·
verified ·
1 Parent(s): eaf9717

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -8,15 +8,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
8
 
9
  MAX_MAX_NEW_TOKENS = 4096
10
  DEFAULT_MAX_NEW_TOKENS = 1024
11
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8000"))
12
 
13
  DESCRIPTION = """\
14
- # ConvAI 9b v2 Chat
15
  """
16
 
17
  # Load model with appropriate device configuration
18
  def load_model():
19
- model_id = "CreitinGameplays/Llama-3.2-3B-Instruct-R1-v1"
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
  # If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
@@ -39,7 +39,7 @@ def load_model():
39
 
40
  model, tokenizer, device = load_model()
41
 
42
- system_prompt_text = "You are Ricardinho."
43
 
44
  def generate(
45
  message: str,
@@ -47,9 +47,9 @@ def generate(
47
  system_prompt: str = system_prompt_text,
48
  max_new_tokens: int = 1024,
49
  temperature: float = 0.6,
50
- top_p: float = 1.0,
51
- top_k: int = 0,
52
- repetition_penalty: float = 1.2,
53
  ) -> Iterator[str]:
54
  conversation = []
55
  if system_prompt:
@@ -107,21 +107,21 @@ chat_interface = gr.ChatInterface(
107
  minimum=0.05,
108
  maximum=1.0,
109
  step=0.05,
110
- value=1.0,
111
  ),
112
  gr.Slider(
113
  label="Top-k",
114
  minimum=0,
115
  maximum=1000,
116
  step=1,
117
- value=0,
118
  ),
119
  gr.Slider(
120
  label="Repetition penalty",
121
  minimum=1.0,
122
  maximum=2.0,
123
  step=0.05,
124
- value=1.2,
125
  ),
126
  ],
127
  stop_btn=None,
 
8
 
9
  MAX_MAX_NEW_TOKENS = 4096
10
  DEFAULT_MAX_NEW_TOKENS = 1024
11
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
12
 
13
  DESCRIPTION = """\
14
+ # Chat
15
  """
16
 
17
  # Load model with appropriate device configuration
18
  def load_model():
19
+ model_id = "CreitinGameplays/Llama-3.1-8B-R1-v0.1"
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
  # If using CPU, load in 32-bit to avoid potential issues with 16-bit operations
 
39
 
40
  model, tokenizer, device = load_model()
41
 
42
+ system_prompt_text = "You are a helpful AI assistant."
43
 
44
  def generate(
45
  message: str,
 
47
  system_prompt: str = system_prompt_text,
48
  max_new_tokens: int = 1024,
49
  temperature: float = 0.6,
50
+ top_p: float = 0.9,
51
+ top_k: int = 50,
52
+ repetition_penalty: float = 1.1,
53
  ) -> Iterator[str]:
54
  conversation = []
55
  if system_prompt:
 
107
  minimum=0.05,
108
  maximum=1.0,
109
  step=0.05,
110
+ value=0.9,
111
  ),
112
  gr.Slider(
113
  label="Top-k",
114
  minimum=0,
115
  maximum=1000,
116
  step=1,
117
+ value=50,
118
  ),
119
  gr.Slider(
120
  label="Repetition penalty",
121
  minimum=1.0,
122
  maximum=2.0,
123
  step=0.05,
124
+ value=1.1,
125
  ),
126
  ],
127
  stop_btn=None,