harsh-manvar commited on
Commit
371bc95
·
verified ·
1 Parent(s): c9b9eba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -5,7 +5,7 @@ from vllm import SamplingParams, LLM
5
  # Load the model and tokenizer from Hugging Face
6
  model_name = "Qwen/Qwen2-7B"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- engine = LLM(model="Qwen/Qwen2-7B")
9
 
10
  def generate_response(prompt, max_tokens, temperature, top_p):
11
  # Tokenize the prompt
@@ -19,7 +19,7 @@ def generate_response(prompt, max_tokens, temperature, top_p):
19
  )
20
 
21
  # Generate text using vLLM
22
- output = engine.generate(inputs["input_ids"], sampling_params)
23
 
24
  # Decode the generated tokens to text
25
  generated_text = tokenizer.decode(output[0]["token_ids"], skip_special_tokens=True)
 
5
  # Load the model and tokenizer from Hugging Face
6
  model_name = "Qwen/Qwen2-7B"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ vllm_model = LLM(model="Qwen/Qwen2-7B")
9
 
10
  def generate_response(prompt, max_tokens, temperature, top_p):
11
  # Tokenize the prompt
 
19
  )
20
 
21
  # Generate text using vLLM
22
+ output = vllm_model.generate(inputs["input_ids"], sampling_params)
23
 
24
  # Decode the generated tokens to text
25
  generated_text = tokenizer.decode(output[0]["token_ids"], skip_special_tokens=True)