Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
3 |
import torch
|
|
|
4 |
|
5 |
# Define quantization configuration
|
6 |
quantization_config = BitsAndBytesConfig(
|
@@ -18,6 +19,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
18 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
19 |
quantization_config=quantization_config,
|
20 |
device_map="auto")
|
|
|
21 |
|
22 |
def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
|
23 |
chat_history = system_prompt
|
@@ -26,6 +28,7 @@ def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
|
|
26 |
chat_history += f"\nUser: {msg}\nAssistant:"
|
27 |
return chat_history
|
28 |
|
|
|
29 |
def generate_response(msg: str, history: list[list[str, str]], system_prompt: str):
|
30 |
chat_history = format_history(msg, history, system_prompt)
|
31 |
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
3 |
import torch
|
4 |
+
import spaces
|
5 |
|
6 |
# Define quantization configuration
|
7 |
quantization_config = BitsAndBytesConfig(
|
|
|
19 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
20 |
quantization_config=quantization_config,
|
21 |
device_map="auto")
|
22 |
+
model.eval()
|
23 |
|
24 |
def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
|
25 |
chat_history = system_prompt
|
|
|
28 |
chat_history += f"\nUser: {msg}\nAssistant:"
|
29 |
return chat_history
|
30 |
|
31 |
+
@spaces.GPU(duration=90)
|
32 |
def generate_response(msg: str, history: list[list[str, str]], system_prompt: str):
|
33 |
chat_history = format_history(msg, history, system_prompt)
|
34 |
|