Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
-
from vllm import
|
4 |
|
5 |
# Load the model and tokenizer from Hugging Face
|
6 |
model_name = "Qwen/Qwen2-7B"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
-
engine =
|
9 |
|
10 |
def generate_response(prompt, max_tokens, temperature, top_p):
|
11 |
# Tokenize the prompt
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
from vllm import SamplingParams, LLM
|
4 |
|
5 |
# Load the model and tokenizer from Hugging Face
|
6 |
model_name = "Qwen/Qwen2-7B"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
engine = LLM.from_pretrained(model_name)
|
9 |
|
10 |
def generate_response(prompt, max_tokens, temperature, top_p):
|
11 |
# Tokenize the prompt
|