harsh-manvar commited on
Commit
f71f3be
Β·
verified Β·
1 Parent(s): 8f95bbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -5,7 +5,9 @@ from vllm import LLM, SamplingParams
5
  # Load the model and tokenizer from Hugging Face
6
  model_name = "facebook/opt-125m"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- vllm_model = LLM(model="facebook/opt-125m")
 
 
9
 
10
  def generate_response(prompt, max_tokens, temperature, top_p):
11
  # Tokenize the prompt
@@ -27,8 +29,8 @@ def generate_response(prompt, max_tokens, temperature, top_p):
27
 
28
  # Gradio UI
29
  with gr.Blocks() as demo:
30
- gr.Markdown("# πŸš€ Hugging Face Integration with vLLM")
31
- gr.Markdown("Generate text using the vLLM integration with Hugging Face models.")
32
 
33
  with gr.Row():
34
  with gr.Column():
@@ -74,4 +76,4 @@ with gr.Blocks() as demo:
74
  )
75
 
76
  # Launch the app
77
- demo.launch()
 
5
  # Load the model and tokenizer from Hugging Face
6
  model_name = "facebook/opt-125m"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+
9
+ # Initialize vLLM with CPU-only configuration
10
+ vllm_model = LLM(model=model_name, tensor_parallel_size=1, device="cpu")
11
 
12
  def generate_response(prompt, max_tokens, temperature, top_p):
13
  # Tokenize the prompt
 
29
 
30
  # Gradio UI
31
  with gr.Blocks() as demo:
32
+ gr.Markdown("# πŸš€ Hugging Face Integration with vLLM (CPU)")
33
+ gr.Markdown("Generate text using the vLLM integration with Hugging Face models on CPU.")
34
 
35
  with gr.Row():
36
  with gr.Column():
 
76
  )
77
 
78
  # Launch the app
79
+ demo.launch()