Spaces:

MR-Eder
/

GRAG-PHI-3.5-MINI-4B-MERGED-HESSIAN-AI

Paused

MR-Eder commited on Jan 13

Commit

3ec9ea7

verified ·

1 Parent(s): 2342f15

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
@@ -52,7 +52,7 @@ def generate(
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(device)
-    im_end_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>')
     # Set up the streamer with modified timeout and parameters
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
@@ -116,7 +116,7 @@ chat_interface = gr.ChatInterface(
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=0.1,
         ),
     ],
     stop_btn=None,

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model_id = "avemio/GRAG-PHI-3.5-MINI-4B-MERGED-HESSIAN-AI"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(device)
+    im_end_token_id = tokenizer.convert_tokens_to_ids('<|end|>')
     # Set up the streamer with modified timeout and parameters
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=0.5,
         ),
     ],
     stop_btn=None,