import gradio as gr import spaces # Import ZeroGPU's helper module from transformers import pipeline import torch # Global generator variable; load lazily. generator = None def get_generator(): global generator if generator is None: try: # If GPU is available, load on GPU (device=0) if torch.cuda.is_available(): generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=0) else: generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=-1) except Exception as e: print("Error loading model on GPU, falling back to CPU:", e) generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=-1) return generator @spaces.GPU # This decorator ensures ZeroGPU allocates a GPU when the function is called. def expand_prompt(prompt, num_variants=5, max_length=100): """ Given a basic prompt, generate `num_variants` expanded prompts using GPT-J-6B. The GPU is only engaged during this function call. """ gen = get_generator() outputs = gen(prompt, max_length=max_length, num_return_sequences=num_variants, do_sample=True) expanded = [out["generated_text"].strip() for out in outputs] return "\n\n".join(expanded) iface = gr.Interface( fn=expand_prompt, inputs=gr.Textbox(lines=2, placeholder="Enter your basic prompt here...", label="Basic Prompt"), outputs=gr.Textbox(lines=10, label="Expanded Prompts"), title="Prompt Expansion Generator", description=( "Enter a basic prompt and receive 5 creative, expanded prompt variants. " "This tool leverages the EleutherAI/gpt-j-6B model on an A100 GPU via ZeroGPU. " "The GPU is only allocated when a prompt is submitted, ensuring proper ZeroGPU initialization. " "Simply copy the output for use with your downstream image-generation pipeline." ) ) if __name__ == "__main__": iface.launch()