myr1-2

Sleeping

myr1-2 / app.py

Update app.py

5755412 verified 6 months ago

1.63 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import (
	AutoConfig,
	AutoTokenizer,
	AutoModelForCausalLM,
	pipeline
	)

	# 1) Decorate your GPU-dependent function(s)
	@spaces.GPU(duration=60) # default is 60s, can increase if needed
	def load_pipeline():
	# -- load config & model from wuhp/myr1 --
	config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	"wuhp/myr1",
	subfolder="myr1",
	config=config,
	torch_dtype=torch.float16, # half precision
	device_map="auto",
	trust_remote_code=True
	)
	# optional: load generation config if you have generation_config.json
	text_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer
	)
	return text_pipeline

	# We'll load it once and store globally
	text_pipeline = load_pipeline()

	def predict(prompt, max_new_tokens=64):
	outputs = text_pipeline(
	prompt, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=0.7
	)
	return outputs[0]["generated_text"]

	# 2) Build your Gradio app
	with gr.Blocks() as demo:
	gr.Markdown("## My LLM Inference (ZeroGPU)")
	prompt = gr.Textbox(label="Prompt")
	max_nt = gr.Slider(1, 200, value=64, step=1, label="Max New Tokens")
	output = gr.Textbox(label="Generated Text")

	btn = gr.Button("Generate")
	btn.click(fn=predict, inputs=[prompt, max_nt], outputs=output)

	demo.launch()