Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import sky | |
def deploy_vllm_on_sky(model_path, gpu_type, cpus, memory, cloud_provider, region, disk_size, disk_type): | |
task = sky.Task( | |
name="vllm_serving", | |
setup="pip install vllm", | |
run=f"vllm serve --model_name_or_path {model_path} --port 8080", | |
envs={"MODEL_PATH": model_path}, | |
workdir=".", | |
ports=8080 | |
) | |
task.set_resources( | |
sky.Resources( | |
cloud=sky.Cloud(provider=cloud_provider, region=region), | |
accelerators=f"{gpu_type}:1", | |
cpus=cpus, | |
memory=memory, | |
disk=sky.Disk(size=disk_size, type=disk_type) | |
) | |
) | |
cluster = sky.Cluster( | |
name="vllm-cluster", | |
cloud=sky.Cloud(provider=cloud_provider, region=region) | |
) | |
sky.launch(task, cluster=cluster) | |
return f"VLLM model deployed on SkyPilot with cluster name: {cluster.name}" | |
def vllm_inference(prompt, cluster_name): | |
# Implementing cluster IP retrieval logic based on cluster name | |
cluster_ip = sky.get_cluster_ip(cluster_name) | |
response = requests.post(f"http://{cluster_ip}:8080", json={"inputs": prompt}) | |
return response.json()["outputs"] | |
vllm_inference_interface = gr.Interface( | |
fn=vllm_inference, | |
inputs=[ | |
gr.Textbox(lines=5, label="Input Prompt"), | |
gr.Textbox(label="Cluster Name", placeholder="Enter the cluster name where VLLM is deployed") | |
], | |
outputs="text", | |
title="VLLM Inference", | |
description="Enter a prompt to generate text using VLLM served on a SkyPilot-managed cloud instance." | |
) | |
sky_pilot_interface = gr.Interface( | |
fn=deploy_vllm_on_sky, | |
inputs=[ | |
gr.Textbox(label="Model Path", placeholder="EleutherAI/gpt-neo-2.7B"), | |
gr.Dropdown(label="GPU Type", choices=["V100", "P100", "T4"], value="V100"), | |
gr.Slider(label="CPUs", minimum=1, maximum=16, value=4), | |
gr.Slider(label="Memory (GB)", minimum=4, maximum=64, value=16), | |
gr.Dropdown(label="Cloud Provider", choices=["AWS", "GCP", "Azure"], value="AWS"), | |
gr.Textbox(label="Region", placeholder="us-west-2"), | |
gr.Slider(label="Disk Size (GB)", minimum=20, maximum=1000, value=100), | |
gr.Dropdown(label="Disk Type", choices=["standard", "ssd"], value="ssd") | |
], | |
outputs="text", | |
title="Deploy VLLM on SkyPilot", | |
description="Configure and deploy a VLLM model on a SkyPilot-managed cloud instance with full parameter customization." | |
) | |
if __name__ == "__main__": | |
with gr.Blocks() as app: | |
with gr.Row(): | |
with gr.Column(): | |
vllm_inference_interface.render() | |
with gr.Column(): | |
sky_pilot_interface.render() | |
app.launch() | |