Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
-
from vllm import
|
4 |
|
5 |
# Load the model and tokenizer from Hugging Face
|
6 |
-
model_name = "Qwen/Qwen2-
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
-
vllm_model = LLM(model="Qwen/Qwen2-
|
9 |
|
10 |
def generate_response(prompt, max_tokens, temperature, top_p):
|
11 |
# Tokenize the prompt
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
from vllm import LLM, SamplingParams
|
4 |
|
5 |
# Load the model and tokenizer from Hugging Face
|
6 |
+
model_name = "Qwen/Qwen2-0.5B"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
vllm_model = LLM(model="Qwen/Qwen2-0.5B")
|
9 |
|
10 |
def generate_response(prompt, max_tokens, temperature, top_p):
|
11 |
# Tokenize the prompt
|