Spaces:

ZackBradshaw
/

vllm-gradio

Sleeping

App Files Files Community

vllm-gradio / app.py

ZackBradshaw

Upload folder using huggingface_hub

fd34675 verified about 1 year ago

raw

history blame

2.74 kB

	import gradio as gr
	import requests
	import sky

	def deploy_vllm_on_sky(model_path, gpu_type, cpus, memory, cloud_provider, region, disk_size, disk_type):
	task = sky.Task(
	name="vllm_serving",
	setup="pip install vllm",
	run=f"vllm serve --model_name_or_path {model_path} --port 8080",
	envs={"MODEL_PATH": model_path},
	workdir=".",
	ports=8080
	)

	task.set_resources(
	sky.Resources(
	cloud=sky.Cloud(provider=cloud_provider, region=region),
	accelerators=f"{gpu_type}:1",
	cpus=cpus,
	memory=memory,
	disk=sky.Disk(size=disk_size, type=disk_type)
	)
	)

	cluster = sky.Cluster(
	name="vllm-cluster",
	cloud=sky.Cloud(provider=cloud_provider, region=region)
	)

	sky.launch(task, cluster=cluster)
	return f"VLLM model deployed on SkyPilot with cluster name: {cluster.name}"

	def vllm_inference(prompt, cluster_name):
	# Implementing cluster IP retrieval logic based on cluster name
	cluster_ip = sky.get_cluster_ip(cluster_name)
	response = requests.post(f"http://{cluster_ip}:8080", json={"inputs": prompt})
	return response.json()["outputs"]

	vllm_inference_interface = gr.Interface(
	fn=vllm_inference,
	inputs=[
	gr.Textbox(lines=5, label="Input Prompt"),
	gr.Textbox(label="Cluster Name", placeholder="Enter the cluster name where VLLM is deployed")
	],
	outputs="text",
	title="VLLM Inference",
	description="Enter a prompt to generate text using VLLM served on a SkyPilot-managed cloud instance."
	)

	sky_pilot_interface = gr.Interface(
	fn=deploy_vllm_on_sky,
	inputs=[
	gr.Textbox(label="Model Path", placeholder="EleutherAI/gpt-neo-2.7B"),
	gr.Dropdown(label="GPU Type", choices=["V100", "P100", "T4"], value="V100"),
	gr.Slider(label="CPUs", minimum=1, maximum=16, value=4),
	gr.Slider(label="Memory (GB)", minimum=4, maximum=64, value=16),
	gr.Dropdown(label="Cloud Provider", choices=["AWS", "GCP", "Azure"], value="AWS"),
	gr.Textbox(label="Region", placeholder="us-west-2"),
	gr.Slider(label="Disk Size (GB)", minimum=20, maximum=1000, value=100),
	gr.Dropdown(label="Disk Type", choices=["standard", "ssd"], value="ssd")
	],
	outputs="text",
	title="Deploy VLLM on SkyPilot",
	description="Configure and deploy a VLLM model on a SkyPilot-managed cloud instance with full parameter customization."
	)
	if __name__ == "__main__":
	with gr.Blocks() as app:
	with gr.Row():
	with gr.Column():
	vllm_inference_interface.render()
	with gr.Column():
	sky_pilot_interface.render()
	app.launch()