Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -63,25 +63,27 @@ def generate_response(user_message, history):
|
|
63 |
with torch.no_grad():
|
64 |
outputs = veri_model.generate(
|
65 |
**inputs,
|
66 |
-
max_new_tokens=
|
67 |
-
temperature=0.
|
68 |
top_p=0.95,
|
69 |
do_sample=True,
|
70 |
-
|
|
|
|
|
71 |
# pad_token_id=veri_tokenizer.eos_token_id,
|
72 |
# eos_token_id=veri_tokenizer.eos_token_id,
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
)
|
80 |
|
81 |
response = veri_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
82 |
|
83 |
# Truncate at CODE END to remove repetitive content
|
84 |
-
response = truncate_at_code_end(response)
|
85 |
|
86 |
|
87 |
if torch.cuda.is_available():
|
|
|
63 |
with torch.no_grad():
|
64 |
outputs = veri_model.generate(
|
65 |
**inputs,
|
66 |
+
max_new_tokens=20000,
|
67 |
+
temperature=0.6,
|
68 |
top_p=0.95,
|
69 |
do_sample=True,
|
70 |
+
frequency_penalty = 0,
|
71 |
+
presence_penalty = 0
|
72 |
+
# top_k=50, # Top-k sampling for efficiency
|
73 |
# pad_token_id=veri_tokenizer.eos_token_id,
|
74 |
# eos_token_id=veri_tokenizer.eos_token_id,
|
75 |
+
# use_cache=True, # Enable KV caching for faster generation
|
76 |
+
# repetition_penalty=1.1, # Reduce repetition
|
77 |
+
# length_penalty=1.0,
|
78 |
+
# early_stopping=True, # Stop early when appropriate
|
79 |
+
# num_beams=1, # Greedy search for speed
|
80 |
+
# pad_token_id=veri_tokenizer.eos_token_id
|
81 |
)
|
82 |
|
83 |
response = veri_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
84 |
|
85 |
# Truncate at CODE END to remove repetitive content
|
86 |
+
# response = truncate_at_code_end(response)
|
87 |
|
88 |
|
89 |
if torch.cuda.is_available():
|