Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
19 |
|
20 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
-
model_id = "avemio/GRAG-
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
model = AutoModelForCausalLM.from_pretrained(
|
25 |
model_id,
|
@@ -52,7 +52,7 @@ def generate(
|
|
52 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
53 |
input_ids = input_ids.to(device)
|
54 |
|
55 |
-
im_end_token_id = tokenizer.convert_tokens_to_ids('<|
|
56 |
|
57 |
# Set up the streamer with modified timeout and parameters
|
58 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
@@ -116,7 +116,7 @@ chat_interface = gr.ChatInterface(
|
|
116 |
minimum=1.0,
|
117 |
maximum=2.0,
|
118 |
step=0.05,
|
119 |
-
value=0.
|
120 |
),
|
121 |
],
|
122 |
stop_btn=None,
|
|
|
19 |
|
20 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
+
model_id = "avemio/GRAG-PHI-3.5-MINI-4B-MERGED-HESSIAN-AI"
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
model = AutoModelForCausalLM.from_pretrained(
|
25 |
model_id,
|
|
|
52 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
53 |
input_ids = input_ids.to(device)
|
54 |
|
55 |
+
im_end_token_id = tokenizer.convert_tokens_to_ids('<|end|>')
|
56 |
|
57 |
# Set up the streamer with modified timeout and parameters
|
58 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
|
|
116 |
minimum=1.0,
|
117 |
maximum=2.0,
|
118 |
step=0.05,
|
119 |
+
value=0.5,
|
120 |
),
|
121 |
],
|
122 |
stop_btn=None,
|