File size: 2,363 Bytes
0efa81e
1e819e6
ed53c37
0efa81e
1e819e6
8f95bbc
0efa81e
f71f3be
9dad4e7
 
0efa81e
 
 
9dad4e7
0efa81e
 
 
 
 
 
 
 
9dad4e7
 
0efa81e
 
9dad4e7
0efa81e
 
 
 
f71f3be
 
0efa81e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fccd59a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams

# Load the model and tokenizer from Hugging Face
model_name = "facebook/opt-125m"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Initialize vLLM with CPU configuration
vllm_model = LLM(model=model_name, tensor_parallel_size=1, device="cpu")

def generate_response(prompt, max_tokens, temperature, top_p):
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")["input_ids"].tolist()[0]
    
    # Define sampling parameters
    sampling_params = SamplingParams(
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )
    
    # Generate text using vLLM
    output = vllm_model.generate(inputs, sampling_params)
    
    # Decode the generated tokens to text
    generated_text = tokenizer.decode(output[0].outputs[0].token_ids, skip_special_tokens=True)
    return generated_text

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# πŸš€ Hugging Face Integration with vLLM (CPU)")
    gr.Markdown("Generate text using the vLLM integration with Hugging Face models on CPU.")

    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Prompt",
                placeholder="Enter your prompt here...",
                lines=3,
            )
            max_tokens = gr.Slider(
                label="Max Tokens",
                minimum=10,
                maximum=500,
                value=100,
                step=10,
            )
            temperature = gr.Slider(
                label="Temperature",
                minimum=0.1,
                maximum=1.0,
                value=0.7,
                step=0.1,
            )
            top_p = gr.Slider(
                label="Top P",
                minimum=0.1,
                maximum=1.0,
                value=0.9,
                step=0.1,
            )
            submit_button = gr.Button("Generate")
        
        with gr.Column():
            output_text = gr.Textbox(
                label="Generated Text",
                lines=10,
                interactive=False,
            )
    
    submit_button.click(
        generate_response,
        inputs=[prompt_input, max_tokens, temperature, top_p],
        outputs=output_text,
    )

# Launch the app
demo.launch()