import gradio as gr import spaces # ZeroGPU helper module from transformers import pipeline # Preload the text-generation model on CPU at startup. # Model: EleutherAI/gpt-j-6B (https://huggingface.co/EleutherAI/gpt-j-6B) # We load on CPU (device=-1) so that initialization is done before the GUI is up. generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=-1) @spaces.GPU # This decorator ensures ZeroGPU allocates a GPU only during this function call. def expand_prompt(prompt, num_variants=5, max_length=100): """ Given a basic prompt, generate `num_variants` expanded prompt variants. Before generation, the model is moved to GPU (A100), and after generation it's moved back to CPU. """ # Move the model to GPU for generation. generator.model.to("cuda") outputs = generator(prompt, max_length=max_length, num_return_sequences=num_variants, do_sample=True) # Move the model back to CPU after generation. generator.model.to("cpu") expanded = [out["generated_text"].strip() for out in outputs] return "\n\n".join(expanded) # Create a Gradio Interface iface = gr.Interface( fn=expand_prompt, inputs=gr.Textbox(lines=2, placeholder="Enter your basic prompt here...", label="Basic Prompt"), outputs=gr.Textbox(lines=10, label="Expanded Prompts"), title="Prompt Expansion Generator", description=( "Enter a basic prompt and receive 5 creative, expanded prompt variants. " "The model (EleutherAI/gpt-j-6B) is preloaded on CPU at startup and then moved to GPU (via ZeroGPU) only " "when a prompt is submitted. Simply copy the output for use with your downstream image-generation pipeline." ) ) if __name__ == "__main__": iface.launch()