Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
@@ -69,7 +69,6 @@ def load_model(model_name):
|
|
69 |
# Only move to CUDA if it's not a quantized model
|
70 |
if model_name not in quantized_models:
|
71 |
model = model.to("cuda")
|
72 |
-
tokenizer = tokenizer.to("cuda")
|
73 |
|
74 |
selected_model = model_name
|
75 |
except Exception as e:
|
@@ -105,6 +104,9 @@ def interact(user_input, history, interaction_count):
|
|
105 |
|
106 |
# Generate response using selected model
|
107 |
input_ids = tokenizer(prompt, return_tensors='pt').input_ids
|
|
|
|
|
|
|
108 |
chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
|
109 |
response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
|
110 |
|
|
|
69 |
# Only move to CUDA if it's not a quantized model
|
70 |
if model_name not in quantized_models:
|
71 |
model = model.to("cuda")
|
|
|
72 |
|
73 |
selected_model = model_name
|
74 |
except Exception as e:
|
|
|
104 |
|
105 |
# Generate response using selected model
|
106 |
input_ids = tokenizer(prompt, return_tensors='pt').input_ids
|
107 |
+
if model_name not in quantized_models:
|
108 |
+
input_ids.to("cuda")
|
109 |
+
|
110 |
chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
|
111 |
response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
|
112 |
|