Spaces:

nafisneehal
/

trialbrain-playground

Sleeping

nafisneehal commited on Nov 21, 2024

Commit

76d6bf4

verified ·

1 Parent(s): 8312e83

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -90,32 +90,34 @@ def generate_response(model_name, system_instruction, user_input):
     prompt = f"""### Instruction:
 {system_instruction}
 Remember to ALWAYS format your response as valid JSON.
 ### Input:
 {user_input}
 ### Response:
 {{"""  # Note the opening curly brace to hint JSON response
-    inputs = model_manager.current_tokenizer([prompt], return_tensors="pt").to(model_manager.device)
-    # Generation configuration optimized for JSON output
-    meta_config = {
-        "do_sample": False,
-        "temperature": 0.0,
-        "max_new_tokens": 512,
-        "repetition_penalty": 1.1,
-        "use_cache": True,
-        "pad_token_id": model_manager.current_tokenizer.eos_token_id,
-        "eos_token_id": model_manager.current_tokenizer.eos_token_id
-    }
-    generation_config = GenerationConfig(**meta_config)
-    # Generate response
     try:
         with torch.no_grad():
             outputs = model_manager.current_model.generate(
-                **inputs,
                 generation_config=generation_config
             )
             decoded_output = model_manager.current_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

     prompt = f"""### Instruction:
 {system_instruction}
 Remember to ALWAYS format your response as valid JSON.
 ### Input:
 {user_input}
 ### Response:
 {{"""  # Note the opening curly brace to hint JSON response
     try:
+        # Ensure inputs are on the correct device
+        inputs = model_manager.current_tokenizer([prompt], return_tensors="pt")
+        # Move input_ids and attention_mask to the same device as the model
+        inputs = {k: v.to(model_manager.device) for k, v in inputs.items()}
+        # Generation configuration optimized for JSON output
+        meta_config = {
+            "do_sample": False,
+            "temperature": 0.0,
+            "max_new_tokens": 512,
+            "repetition_penalty": 1.2,
+            "use_cache": True,
+            "pad_token_id": model_manager.current_tokenizer.eos_token_id,
+            "eos_token_id": model_manager.current_tokenizer.eos_token_id
+        }
+        generation_config = GenerationConfig(**meta_config)
+        # Generate response
         with torch.no_grad():
             outputs = model_manager.current_model.generate(
+                input_ids=inputs['input_ids'],
+                attention_mask=inputs['attention_mask'],
                 generation_config=generation_config
             )
             decoded_output = model_manager.current_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]