import gradio as gr
import spaces  # Import ZeroGPU's helper module
from transformers import pipeline
import torch

# Global generator variable; load lazily.
generator = None

def get_generator():
    global generator
    if generator is None:
        try:
            # If GPU is available, load on GPU (device=0)
            if torch.cuda.is_available():
                generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=0)
            else:
                generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=-1)
        except Exception as e:
            print("Error loading model on GPU, falling back to CPU:", e)
            generator = pipeline("text-generation", model="EleutherAI/gpt-j-6B", device=-1)
    return generator

@spaces.GPU  # This decorator ensures ZeroGPU allocates a GPU when the function is called.
def expand_prompt(prompt, num_variants=5, max_length=100):
    """
    Given a basic prompt, generate `num_variants` expanded prompts using GPT-J-6B.
    The GPU is only engaged during this function call.
    """
    gen = get_generator()
    outputs = gen(prompt, max_length=max_length, num_return_sequences=num_variants, do_sample=True)
    expanded = [out["generated_text"].strip() for out in outputs]
    return "\n\n".join(expanded)

iface = gr.Interface(
    fn=expand_prompt,
    inputs=gr.Textbox(lines=2, placeholder="Enter your basic prompt here...", label="Basic Prompt"),
    outputs=gr.Textbox(lines=10, label="Expanded Prompts"),
    title="Prompt Expansion Generator",
    description=(
        "Enter a basic prompt and receive 5 creative, expanded prompt variants. "
        "This tool leverages the EleutherAI/gpt-j-6B model on an A100 GPU via ZeroGPU. "
        "The GPU is only allocated when a prompt is submitted, ensuring proper ZeroGPU initialization. "
        "Simply copy the output for use with your downstream image-generation pipeline."
    )
)

if __name__ == "__main__":
    iface.launch()