import gradio as gr
import spaces  # ZeroGPU helper module
from transformers import pipeline

# Preload the text-generation model on CPU at startup.
# Model: EleutherAI/gpt-j-6B (https://huggingface.co/EleutherAI/gpt-j-6B)
# We load on CPU (device=-1) so that initialization is done before the GUI is up.
generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=-1)

@spaces.GPU  # This decorator ensures ZeroGPU allocates a GPU only during this function call.
def expand_prompt(prompt, num_variants=5, max_length=100):
    """
    Given a basic prompt, generate `num_variants` expanded prompt variants.
    Before generation, the model is moved to GPU (A100), and after generation it's moved back to CPU.
    """
    # Move the model to GPU for generation.
    generator.model.to("cuda")
    outputs = generator(prompt, max_length=max_length, num_return_sequences=num_variants, do_sample=True)
    # Move the model back to CPU after generation.
    generator.model.to("cpu")
    expanded = [out["generated_text"].strip() for out in outputs]
    return "\n\n".join(expanded)

# Create a Gradio Interface
iface = gr.Interface(
    fn=expand_prompt,
    inputs=gr.Textbox(lines=2, placeholder="Enter your basic prompt here...", label="Basic Prompt"),
    outputs=gr.Textbox(lines=10, label="Expanded Prompts"),
    title="Prompt Expansion Generator",
    description=(
        "Enter a basic prompt and receive 5 creative, expanded prompt variants. "
        "The model (EleutherAI/gpt-j-6B) is preloaded on CPU at startup and then moved to GPU (via ZeroGPU) only "
        "when a prompt is submitted. Simply copy the output for use with your downstream image-generation pipeline."
    )
)

if __name__ == "__main__":
    iface.launch()