import gradio as gr import torch from transformers import pipeline import os # --- App Configuration --- TITLE = "✍️ AI Story Outliner" DESCRIPTION = """ Enter a prompt and get 10 unique story outlines from a CPU-friendly AI model. The app uses **Tencent's Hunyuan-1.8B** to generate creative outlines formatted in Markdown. **How it works:** 1. Enter your story idea. 2. The AI will generate 10 different story outlines. 3. Each outline has a dramatic beginning and is concise, like a song. """ # --- Example Prompts for Storytelling --- examples = [ ["The old lighthouse keeper stared into the storm. He'd seen many tempests, but this one was different. This one had eyes..."], ["In a city powered by dreams, a young inventor creates a machine that can record them. His first recording reveals a nightmare that doesn't belong to him."], ["The knight adjusted his helmet, the dragon's roar echoing in the valley. He was ready for the fight, but for what the dragon said when it finally spoke."], ["She found the old leather-bound journal in her grandfather's attic. The first entry read: 'To relieve stress, I walk in the woods. But today, the woods walked with me.'"], ["The meditation app promised to help her 'delete unhelpful thoughts.' She tapped the button, and to her horror, the memory of her own name began to fade..."] ] # --- Model Initialization --- # This section loads a smaller, CPU-friendly model. # It will automatically use the HF_TOKEN secret when deployed on Hugging Face Spaces. generator = None model_error = None try: print("Initializing model... This may take a moment.") # Explicitly load the token from environment variables (for HF Spaces secrets). # This makes the authentication more robust, overriding any bad default credentials. hf_token = os.environ.get("HF_TOKEN") # Add a check to see if the token was loaded correctly. if hf_token: print("✅ HF_TOKEN secret found.") else: print("⚠️ HF_TOKEN secret not found. Please ensure it is set in your Hugging Face Space settings.") # Raise an error to stop the app from proceeding without a token. raise ValueError("Hugging Face token not found. Please set the HF_TOKEN secret.") # Using a smaller model from the user's list. # Passing the token explicitly to ensure correct authentication. generator = pipeline( "text-generation", model="tencent/Hunyuan-1.8B-Instruct", torch_dtype=torch.bfloat16, # Use bfloat16 for better performance if available device_map="auto", # Will use GPU if available, otherwise CPU token=hf_token ) print("✅ Tencent/Hunyuan-1.8B-Instruct model loaded successfully!") except Exception as e: model_error = e print(f"--- 🚨 Error loading model ---") print(f"Error: {model_error}") # --- App Logic --- def generate_stories(prompt: str) -> list[str]: """ Generates 10 story outlines from the loaded model based on the user's prompt. """ # If the model failed to load, display the error in all output boxes. if model_error: error_message = f"**Model failed to load.**\n\nPlease check the console logs for details.\n\n**Error:**\n`{str(model_error)}`" return [error_message] * 10 if not prompt: # Return a list of 10 empty strings to clear the outputs return [""] * 10 # This prompt format is specific to the Hunyuan model. system_instruction = "You are an expert storyteller. Your task is to take a user's prompt and write a short story as a Markdown outline. The story must have a dramatic arc and be the length of a song. Use emojis to highlight the story sections." story_prompt = f"<|im_start|>system\n{system_instruction}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" # Parameters for the pipeline to generate 10 diverse results. params = { "max_new_tokens": 250, "num_return_sequences": 10, "do_sample": True, "temperature": 0.8, "top_p": 0.95, "pad_token_id": generator.tokenizer.eos_token_id # Suppress warning } # Generate 10 different story variations outputs = generator(story_prompt, **params) # Extract the generated text and clean it up. stories = [] for out in outputs: # Remove the prompt part from the full generated text full_text = out['generated_text'] assistant_response = full_text.split("<|im_start|>assistant\n")[-1] stories.append(assistant_response) # Ensure we return exactly 10 stories, padding if necessary. while len(stories) < 10: stories.append("Failed to generate a story for this slot.") return stories # --- Gradio Interface --- with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 95% !important;}") as demo: gr.Markdown(f"