Spaces:

Agents-MCP-Hackathon
/

LLMGameHub

Running

App Files Files Community

Nikita commited on Jun 5

Commit

c4c426e

unverified ·

2 Parent(s): ccccaf7 2999669

Merge pull request #2 from DeltaZN/feat/improve-image-generation

Browse files

Files changed (14) hide show

src/agent/image_agent.py +82 -0
src/agent/llm.py +9 -0
src/agent/llm_agent.py +55 -43
src/agent/llm_graph.py +12 -5
src/agent/music_agent.py +47 -0
src/agent/tools.py +17 -14
src/audio/audio_generator.py +5 -5
src/config.py +4 -1
src/css.py +6 -7
src/game_constructor.py +9 -2
src/game_setting.py +13 -0
src/game_state.py +3 -3
src/images/image_generator.py +55 -27
src/main.py +6 -1

src/agent/image_agent.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from pydantic import BaseModel, Field
+from typing import Literal, Optional
+from agent.llm import create_light_llm
+from langchain_core.messages import SystemMessage, HumanMessage
+import logging
+logger = logging.getLogger(__name__)
+IMAGE_GENERATION_SYSTEM_PROMPT = """You are an AI agent for a visual novel game. Your role is to process an incoming scene description and determine if the visual scene needs to change. If it does, you will generate a new `scene_description`. This `scene_description` MUST BE a highly detailed image prompt, specifically engineered for an AI image generation model, and it MUST adhere to the strict first-person perspective detailed below.
+**Your Core Tasks & Output Structure:**
+Your output MUST be a `ChangeScene` object. You need to:
+1.  **Determine Change Type:** Decide if the scene requires a "change_completely", "modify", or "no_change" and set this in the `change_scene` field of the output object.
+2.  **Generate FPS Image Prompt:** If your decision is "change_completely" or "modify", you MUST then generate the image prompt and place it in the `scene_description` field of the output object. If "no_change", this field can be null or empty.
+**Mandatory: First-Person Perspective (FPS) for Image Prompts**
+The image prompt you generate for the `scene_description` field MUST strictly describe the scene from a first-person perspective (FPS), as if the player is looking directly through the character's eyes.
+    *   **Viewpoint:** All descriptions must be from the character's eye level, looking forward or as indicated by the scene.
+    *   **Character Visibility:** The scene must be depicted strictly as if looking through the character's eyes. NO part of the character's own body (e.g., hands, arms, feet, clothing on them) should be visible or described in the prompt. The view is purely what is external to the character.
+    *   **Immersion:** Focus on what the character directly sees and perceives in their immediate environment. Use phrasing that reflects this, for example: "I see...", "Before me lies...", "Looking through the grimy window...", "The corridor stretches out in front of me."
+**Guidelines for Crafting the FPS Image Prompt (for `scene_description` field):**
+When generating the image prompt, ensure it's detailed and considers the following aspects, all from the character's first-person viewpoint:
+1.  **Subject & Focus (as seen by the character):**
+    *   What is the primary subject or point of interest directly in the character's view?
+    *   Describe any other characters visible to the POV character: their appearance (from the character's perspective), clothing, expressions, posture, and actions.
+    *   Detail key objects, items, or environmental elements the character is interacting with or observing.
+2.  **Setting & Environment (from the character's perspective):**
+    *   Describe the immediate surroundings as the character would see them.
+    *   Time of day and weather conditions as perceived by the character.
+    *   Specific architectural or natural features visible in the character's field of view.
+3.  **Art Style & Medium:**
+    *   Specify the desired visual style (e.g., photorealistic, anime, manga, watercolor, oil painting, pixel art, 3D render, concept art, comic book).
+    *   Mention any specific artist influences if relevant (e.g., "in the style of Studio Ghibli").
+4.  **Composition & Framing (from the character's viewpoint):**
+    *   How is the scene framed from the character's eyes? (e.g., "looking straight ahead at a door," "view through a sniper scope," "gazing up at a tall tower").
+    *   Describe the arrangement of elements as perceived by the character. Avoid terms like "medium shot" or "wide shot" unless they can be rephrased from an FPS view (e.g., "a wide vista opens up before me").
+5.  **Lighting & Atmosphere (as perceived by the character):**
+    *   Describe lighting conditions (e.g., "bright sunlight streams through the window in front of me," "only the dim glow of my flashlight illuminates the passage ahead," "neon signs reflect off the wet street I'm looking at").
+    *   What is the overall mood or atmosphere from the character's perspective? (e.g., "a tense silence hangs in the air as I look down the dark hallway," "a sense of peace as I gaze at the sunset over the mountains").
+6.  **Color Palette:**
+    *   Specify dominant colors or a color scheme relevant to what the character sees.
+7.  **Details & Keywords:**
+    *   Include crucial details from the input scene description that the character would notice.
+    *   Use descriptive adjectives and strong keywords.
+**Example for the `scene_description` field (the FPS image prompt):**
+"FPS view. Through the cockpit window of a futuristic hovercar, a sprawling neon-lit cyberpunk city stretches out under a stormy, rain-lashed sky. Rain streaks across the glass. The hum of the engine is palpable. Photorealistic, Blade Runner style. Cool blue and vibrant pink neon palette."
+"""
+class ChangeScene(BaseModel):
+    change_scene: Literal["change_completely", "modify", "no_change"] = Field(
+        description="Whether the scene should be completely changed, just modified or not changed at all"
+    )
+    scene_description: Optional[str] = None
+image_prompt_generator_llm = create_light_llm(0.1).with_structured_output(ChangeScene)
+async def generate_image_prompt(scene_description: str, request_id: str) -> ChangeScene:
+    """
+    Generates a detailed image prompt string based on a scene description.
+    This prompt is intended for use with an AI image generation model.
+    """
+    logger.info(f"Generating image prompt for the current scene: {request_id}")
+    response = await image_prompt_generator_llm.ainvoke(
+        [
+            SystemMessage(content=IMAGE_GENERATION_SYSTEM_PROMPT),
+            HumanMessage(content=scene_description),
+        ]
+    )
+    logger.info(f"Image prompt generated: {request_id}")
+    return response

src/agent/llm.py CHANGED Viewed

@@ -43,6 +43,15 @@ def create_llm(
         top_p=top_p,
         thinking_budget=1024,
     )
 def create_precise_llm() -> ChatGoogleGenerativeAI:

         top_p=top_p,
         thinking_budget=1024,
     )
+def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
+    return ChatGoogleGenerativeAI(
+        model="gemini-2.0-flash",
+        google_api_key=_get_api_key(),
+        temperature=temperature,
+        top_p=top_p
+    )
 def create_precise_llm() -> ChatGoogleGenerativeAI:

src/agent/llm_agent.py CHANGED Viewed

@@ -1,61 +1,73 @@
-"""Simple interface for querying the LLM directly."""
-import logging
-from typing import List, Optional
-from pydantic import BaseModel, Field
 from agent.llm import create_llm
 logger = logging.getLogger(__name__)
-class ChangeScene(BaseModel):
-    """Information about a scene change."""
-    change_scene: bool = Field(description="Whether the scene should change")
-    scene_description: Optional[str] = None
-class ChangeMusic(BaseModel):
-    """Information about a music change."""
-    change_music: bool = Field(description="Whether the music should change")
-    music_description: Optional[str] = None
 class PlayerOption(BaseModel):
-    """Single option for the player."""
     option_description: str = Field(
-        description=(
-            "Description of the option, e.g. '[Say] Hello!' "
-            "or 'Go to the forest'"
-        )
     )
 class LLMOutput(BaseModel):
-    """Expected structure returned by the LLM."""
-    change_scene: ChangeScene
-    change_music: ChangeMusic
     game_message: str = Field(
-        description=(
-            "Message shown to the player, e.g. 'You entered the forest...'"
-        )
     )
     player_options: List[PlayerOption] = Field(
-        description="Up to three options for the player"
     )
-_llm = create_llm().with_structured_output(LLMOutput)
-async def process_user_input(text: str) -> LLMOutput:
-    """Send user text to the LLM and return the parsed response."""
-    logger.info("User choice: %s", text)
-    response: LLMOutput = await _llm.ainvoke(text)
-    logger.info("LLM response: %s", response)
-    return response

 from agent.llm import create_llm
+from pydantic import BaseModel, Field
+from typing import List
+import logging
+from agent.image_agent import ChangeScene
+import asyncio
+from agent.music_agent import generate_music_prompt
+from agent.image_agent import generate_image_prompt
+import uuid
 logger = logging.getLogger(__name__)
 class PlayerOption(BaseModel):
     option_description: str = Field(
+        description="The description of the option, Examples: [Change location] Go to the forest; [Say] Hello!"
     )
 class LLMOutput(BaseModel):
     game_message: str = Field(
+        description="The message to the player, Example: You entered the forest, and you see unknown scary creatures. What do you do?"
     )
     player_options: List[PlayerOption] = Field(
+        description="The list of up to 3 options for the player to choose from."
     )
+class MultiAgentResponse(BaseModel):
+    game_message: str = Field(
+        description="The message to the player, Example: You entered the forest, and you see unknown scary creatures. What do you do?"
+    )
+    player_options: List[PlayerOption] = Field(
+        description="The list of up to 3 options for the player to choose from."
+    )
+    music_prompt: str = Field(description="The prompt for the music generation model.")
+    change_scene: ChangeScene = Field(description="The change to the scene.")
+llm = create_llm().with_structured_output(MultiAgentResponse)
+async def process_user_input(input: str) -> MultiAgentResponse:
+    """
+    Process user input and update the state.
+    """
+    request_id = str(uuid.uuid4())
+    logger.info(f"LLM input received: {request_id}")
+    response: LLMOutput = await llm.ainvoke(input)
+    # return response
+    current_state = f"""{input}
+    Game reaction: {response.game_message}
+    Player options: {response.player_options}
+    """
+    music_prompt_task = generate_music_prompt(current_state, request_id)
+    change_scene_task = generate_image_prompt(current_state, request_id)
+    music_prompt, change_scene = await asyncio.gather(music_prompt_task, change_scene_task)
+    multi_agent_response = MultiAgentResponse(
+        game_message=response.game_message,
+        player_options=response.player_options,
+        music_prompt=music_prompt,
+        change_scene=change_scene,
+    )
+    logger.info(f"LLM responded: {request_id}")
+    return multi_agent_response

src/agent/llm_graph.py CHANGED Viewed

@@ -3,8 +3,9 @@
 import logging
 from dataclasses import dataclass
 from typing import Any, Dict, Optional
 from langgraph.graph import END, StateGraph
 from agent.tools import (
     check_ending,
@@ -14,7 +15,7 @@ from agent.tools import (
     update_state_with_choice,
 )
 from agent.state import get_user_state
 logger = logging.getLogger(__name__)
@@ -59,11 +60,13 @@ async def node_init_game(state: GraphState) -> GraphState:
     first_scene = await generate_scene.ainvoke(
         {"user_hash": state.user_hash, "last_choice": "start"}
     )
     await generate_scene_image.ainvoke(
         {
             "user_hash": state.user_hash,
             "scene_id": first_scene["scene_id"],
-            "prompt": first_scene["description"],
         }
     )
     state.scene = first_scene
@@ -91,13 +94,17 @@ async def node_player_step(state: GraphState) -> GraphState:
                 "last_choice": state.choice_text,
             }
         )
-        await generate_scene_image.ainvoke(
             {
                 "user_hash": state.user_hash,
                 "scene_id": next_scene["scene_id"],
-                "prompt": next_scene["description"],
             }
         )
         state.scene = next_scene
     return state

 import logging
 from dataclasses import dataclass
 from typing import Any, Dict, Optional
+import asyncio
 from langgraph.graph import END, StateGraph
+from agent.image_agent import generate_image_prompt
 from agent.tools import (
     check_ending,
     update_state_with_choice,
 )
 from agent.state import get_user_state
+from audio.audio_generator import change_music_tone
 logger = logging.getLogger(__name__)
     first_scene = await generate_scene.ainvoke(
         {"user_hash": state.user_hash, "last_choice": "start"}
     )
+    change_scene = await generate_image_prompt(first_scene["description"], state.user_hash)
+    logger.info(f"Change scene: {change_scene}")
     await generate_scene_image.ainvoke(
         {
             "user_hash": state.user_hash,
             "scene_id": first_scene["scene_id"],
+            "change_scene": change_scene,
         }
     )
     state.scene = first_scene
                 "last_choice": state.choice_text,
             }
         )
+        change_scene = await generate_image_prompt(next_scene["description"], state.user_hash)
+        image_task = generate_scene_image.ainvoke(
             {
                 "user_hash": state.user_hash,
                 "scene_id": next_scene["scene_id"],
+                "current_image": user_state.assets[scene_id],
+                "change_scene": change_scene,
             }
         )
+        music_task = change_music_tone(state.user_hash, next_scene["music"])
+        await asyncio.gather(image_task, music_task)
         state.scene = next_scene
     return state

src/agent/music_agent.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from pydantic import BaseModel
+from agent.llm import create_light_llm
+from langchain_core.messages import SystemMessage, HumanMessage
+import logging
+logger = logging.getLogger(__name__)
+music_options = """Instruments: 303 Acid Bass, 808 Hip Hop Beat, Accordion, Alto Saxophone, Bagpipes, Balalaika Ensemble, Banjo, Bass Clarinet, Bongos, Boomy Bass, Bouzouki, Buchla Synths, Cello, Charango, Clavichord, Conga Drums, Didgeridoo, Dirty Synths, Djembe, Drumline, Dulcimer, Fiddle, Flamenco Guitar, Funk Drums, Glockenspiel, Guitar, Hang Drum, Harmonica, Harp, Harpsichord, Hurdy-gurdy, Kalimba, Koto, Lyre, Mandolin, Maracas, Marimba, Mbira, Mellotron, Metallic Twang, Moog Oscillations, Ocarina, Persian Tar, Pipa, Precision Bass, Ragtime Piano, Rhodes Piano, Shamisen, Shredding Guitar, Sitar, Slide Guitar, Smooth Pianos, Spacey Synths, Steel Drum, Synth Pads, Tabla, TR-909 Drum Machine, Trumpet, Tuba, Vibraphone, Viola Ensemble, Warm Acoustic Guitar, Woodwinds, ...
+Music Genre: Acid Jazz, Afrobeat, Alternative Country, Baroque, Bengal Baul, Bhangra, Bluegrass, Blues Rock, Bossa Nova, Breakbeat, Celtic Folk, Chillout, Chiptune, Classic Rock, Contemporary R&B, Cumbia, Deep House, Disco Funk, Drum & Bass, Dubstep, EDM, Electro Swing, Funk Metal, G-funk, Garage Rock, Glitch Hop, Grime, Hyperpop, Indian Classical, Indie Electronic, Indie Folk, Indie Pop, Irish Folk, Jam Band, Jamaican Dub, Jazz Fusion, Latin Jazz, Lo-Fi Hip Hop, Marching Band, Merengue, New Jack Swing, Minimal Techno, Moombahton, Neo-Soul, Orchestral Score, Piano Ballad, Polka, Post-Punk, 60s Psychedelic Rock, Psytrance, R&B, Reggae, Reggaeton, Renaissance Music, Salsa, Shoegaze, Ska, Surf Rock, Synthpop, Techno, Trance, Trap Beat, Trip Hop, Vaporwave, Witch house, ...
+Mood/Description: Acoustic Instruments, Ambient, Bright Tones, Chill, Crunchy Distortion, Danceable, Dreamy, Echo, Emotional, Ethereal Ambience, Experimental, Fat Beats, Funky, Glitchy Effects, Huge Drop, Live Performance, Lo-fi, Ominous Drone, Psychedelic, Rich Orchestration, Saturated Tones, Subdued Melody, Sustained Chords, Swirling Phasers, Tight Groove, Unsettling, Upbeat, Virtuoso, Weird Noises, ...
+"""
+system_prompt = f"""
+You are a music agent responsible for generating appropriate music tones for scenes in a visual novel game.
+Your task is to analyze the current scene description and generate a detailed music prompt that captures:
+1. The emotional atmosphere
+2. The intensity level
+3. The genre/style that best fits the scene
+4. Specific instruments that would enhance the mood
+You have access to a wide range of musical elements including:
+{music_options}
+When generating a music prompt:
+- Consider the scene's context, mood, and any suspense elements
+- Choose instruments that complement the scene's atmosphere
+- Select a genre that matches the story's setting and tone
+- Include specific mood descriptors to guide the music generation
+Your output should be a concise but detailed prompt that the music generation model can use to create an appropriate soundtrack for the scene.
+"""
+class MusicPrompt(BaseModel):
+    prompt: str
+llm = create_light_llm(0.1).with_structured_output(MusicPrompt)
+async def generate_music_prompt(scene_description: str, request_id: str) -> str:
+    logger.info(f"Generating music prompt for the current scene: {request_id}")
+    response = await llm.ainvoke(
+        [SystemMessage(content=system_prompt), HumanMessage(content=scene_description)]
+    )
+    logger.info(f"Music prompt generated: {request_id}")
+    return response.prompt

src/agent/tools.py CHANGED Viewed

@@ -18,7 +18,8 @@ from agent.models import (
 )
 from agent.prompts import ENDING_CHECK_PROMPT, SCENE_PROMPT, STORY_FRAME_PROMPT
 from agent.state import get_user_state, set_user_state
-from images.image_generator import generate_image
 logger = logging.getLogger(__name__)
@@ -71,11 +72,9 @@ async def generate_scene(
     prompt = SCENE_PROMPT.format(
         lore=state.story_frame.lore,
         goal=state.story_frame.goal,
-        milestones=','.join(m.id for m in state.story_frame.milestones),
-        endings=','.join(e.id for e in state.story_frame.endings),
-        history='; '.join(
-            f"{c.scene_id}:{c.choice_text}" for c in state.user_choices
-        ),
         last_choice=last_choice,
     )
     resp: SceneLLM = await llm.ainvoke(prompt)
@@ -107,11 +106,19 @@ async def generate_scene(
 async def generate_scene_image(
     user_hash: Annotated[str, "User session ID"],
     scene_id: Annotated[str, "Scene ID"],
-    prompt: Annotated[str, "Prompt for image generation"],
 ) -> Annotated[str, "Path to generated image"]:
     """Generate an image for a scene and save the path in the state."""
     try:
-        image_path, _ = await generate_image(prompt)
         state = get_user_state(user_hash)
         if scene_id in state.scenes:
             state.scenes[scene_id].image = image_path
@@ -152,14 +159,10 @@ async def check_ending(
     if not state.story_frame:
         return _err("No story frame")
     llm = create_llm().with_structured_output(EndingCheckResult)
-    history = '; '.join(
-        f"{c.scene_id}:{c.choice_text}" for c in state.user_choices
-    )
     prompt = ENDING_CHECK_PROMPT.format(
         history=history,
-        endings=','.join(
-            f"{e.id}:{e.condition}" for e in state.story_frame.endings
-        ),
     )
     resp: EndingCheckResult = await llm.ainvoke(prompt)
     if resp.ending_reached and resp.ending:

 )
 from agent.prompts import ENDING_CHECK_PROMPT, SCENE_PROMPT, STORY_FRAME_PROMPT
 from agent.state import get_user_state, set_user_state
+from images.image_generator import modify_image, generate_image
+from agent.image_agent import ChangeScene
 logger = logging.getLogger(__name__)
     prompt = SCENE_PROMPT.format(
         lore=state.story_frame.lore,
         goal=state.story_frame.goal,
+        milestones=",".join(m.id for m in state.story_frame.milestones),
+        endings=",".join(e.id for e in state.story_frame.endings),
+        history="; ".join(f"{c.scene_id}:{c.choice_text}" for c in state.user_choices),
         last_choice=last_choice,
     )
     resp: SceneLLM = await llm.ainvoke(prompt)
 async def generate_scene_image(
     user_hash: Annotated[str, "User session ID"],
     scene_id: Annotated[str, "Scene ID"],
+    change_scene: Annotated[ChangeScene, "Prompt for image generation"],
+    current_image: Annotated[str, "Current image"] | None = None,
 ) -> Annotated[str, "Path to generated image"]:
     """Generate an image for a scene and save the path in the state."""
     try:
+        image_path = current_image
+        if change_scene.change_scene == "change_completely" or change_scene.change_scene == "modify":
+            image_path, _ = await (
+                generate_image(change_scene.scene_description)
+                if current_image is None
+                # for now always modify the image to avoid the generating an update in a completely wrong style
+                else modify_image(current_image, change_scene.scene_description)
+            )
         state = get_user_state(user_hash)
         if scene_id in state.scenes:
             state.scenes[scene_id].image = image_path
     if not state.story_frame:
         return _err("No story frame")
     llm = create_llm().with_structured_output(EndingCheckResult)
+    history = "; ".join(f"{c.scene_id}:{c.choice_text}" for c in state.user_choices)
     prompt = ENDING_CHECK_PROMPT.format(
         history=history,
+        endings=",".join(f"{e.id}:{e.condition}" for e in state.story_frame.endings),
     )
     resp: EndingCheckResult = await llm.ainvoke(prompt)
     if resp.ending_reached and resp.ending:

src/audio/audio_generator.py CHANGED Viewed

@@ -13,10 +13,12 @@ logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value(), http_options={'api_version': 'v1alpha'})
 async def generate_music(user_hash: str, music_tone: str, receive_audio):
-      async with (
         client.aio.live.music.connect(model='models/lyria-realtime-exp') as session,
         asyncio.TaskGroup() as tg,
-      ):
         # Set up task to receive server messages.
         tg.create_task(receive_audio(session, user_hash))
@@ -31,10 +33,9 @@ async def generate_music(user_hash: str, music_tone: str, receive_audio):
         )
         await session.play()
         logger.info(f"Started music generation for user hash {user_hash}, music tone: {music_tone}")
-        await cleanup_music_session(user_hash)
         sessions[user_hash] = {
             'session': session,
-            'queue': queue.Queue(maxsize=3)
         }
 async def change_music_tone(user_hash: str, new_tone):
@@ -43,7 +44,6 @@ async def change_music_tone(user_hash: str, new_tone):
     if not session:
         logger.error(f"No session found for user hash {user_hash}")
         return
-    await session.reset_context()
     await session.set_weighted_prompts(
         prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
     )

 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value(), http_options={'api_version': 'v1alpha'})
 async def generate_music(user_hash: str, music_tone: str, receive_audio):
+    if user_hash in sessions:
+        return
+    async with (
         client.aio.live.music.connect(model='models/lyria-realtime-exp') as session,
         asyncio.TaskGroup() as tg,
+    ):
         # Set up task to receive server messages.
         tg.create_task(receive_audio(session, user_hash))
         )
         await session.play()
         logger.info(f"Started music generation for user hash {user_hash}, music tone: {music_tone}")
         sessions[user_hash] = {
             'session': session,
+            'queue': queue.Queue()
         }
 async def change_music_tone(user_hash: str, new_tone):
     if not session:
         logger.error(f"No session found for user hash {user_hash}")
         return
     await session.set_weighted_prompts(
         prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
     )

src/config.py CHANGED Viewed

@@ -24,8 +24,11 @@ class BaseAppSettings(BaseSettings):
 class AppSettings(BaseAppSettings):
     gemini_api_key: SecretStr
     top_p: float = 0.95
     temperature: float = 0.5
 settings = AppSettings()

 class AppSettings(BaseAppSettings):
     gemini_api_key: SecretStr
+    gemini_api_keys: SecretStr
+    # assistant_api_key: SecretStr
     top_p: float = 0.95
     temperature: float = 0.5
+    pregenerate_next_scene: bool = True
 settings = AppSettings()

src/css.py CHANGED Viewed

@@ -33,11 +33,11 @@ custom_css = """
     background: rgba(0,0,0,0.7) !important;
     border: none !important;
     color: white !important;
-    font-size: 18px !important;
     line-height: 1.5 !important;
-    padding: 20px !important;
     border-radius: 10px !important;
-    margin-bottom: 20px !important;
 }
 img {
@@ -49,7 +49,7 @@ img {
     border: none !important;
     color: white !important;
     -webkit-text-fill-color: white !important;
-    font-size: 18px !important;
     resize: none !important;
 }
@@ -57,13 +57,12 @@ img {
 .choice-buttons {
     background: rgba(0,0,0,0.7) !important;
     border-radius: 10px !important;
-    padding: 15px !important;
 }
 .choice-buttons label {
     color: white !important;
-    font-size: 16px !important;
-    margin-bottom: 10px !important;
 }
 /* Fix radio button backgrounds */

     background: rgba(0,0,0,0.7) !important;
     border: none !important;
     color: white !important;
+    font-size: 15px !important;
     line-height: 1.5 !important;
+    padding: 10px !important;
     border-radius: 10px !important;
+    margin-bottom: 10px !important;
 }
 img {
     border: none !important;
     color: white !important;
     -webkit-text-fill-color: white !important;
+    font-size: 15px !important;
     resize: none !important;
 }
 .choice-buttons {
     background: rgba(0,0,0,0.7) !important;
     border-radius: 10px !important;
+    padding: 10px !important;
 }
 .choice-buttons label {
     color: white !important;
+    font-size: 14px !important;
 }
 /* Fix radio button backgrounds */

src/game_constructor.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import gradio as gr
 import json
 import uuid
 from game_setting import Character, GameSetting
 from agent.runner import process_step
 from audio.audio_generator import start_music_generation
 import asyncio
 # Predefined suggestions for demo
 SETTING_SUGGESTIONS = [
@@ -105,6 +111,7 @@ def save_game_config(
     except Exception as e:
         return f"❌ Error saving configuration: {str(e)}"
 async def start_game_with_settings(
     user_hash: str,
     setting_desc: str,
@@ -139,6 +146,8 @@ async def start_game_with_settings(
     )
     game_setting = GameSetting(character=character, setting=setting_desc, genre=genre)
     # Запускаем LLM-граф для инициализации истории
     result = await process_step(
@@ -149,8 +158,6 @@ async def start_game_with_settings(
         genre=game_setting.genre,
     )
-    asyncio.create_task(start_music_generation(user_hash, "neutral"))
     scene = result["scene"]
     scene_text = scene["description"]
     scene_image = scene.get("image", "")

 import gradio as gr
 import json
 import uuid
+from game_setting import Character, GameSetting, get_user_story
+from game_state import story, state, get_current_scene
+from agent.llm_agent import process_user_input
+from images.image_generator import generate_image
 from game_setting import Character, GameSetting
 from agent.runner import process_step
 from audio.audio_generator import start_music_generation
 import asyncio
+from config import settings
 # Predefined suggestions for demo
 SETTING_SUGGESTIONS = [
     except Exception as e:
         return f"❌ Error saving configuration: {str(e)}"
 async def start_game_with_settings(
     user_hash: str,
     setting_desc: str,
     )
     game_setting = GameSetting(character=character, setting=setting_desc, genre=genre)
+    asyncio.create_task(start_music_generation(user_hash, "neutral"))
     # Запускаем LLM-граф для инициализации истории
     result = await process_step(
         genre=game_setting.genre,
     )
     scene = result["scene"]
     scene_text = scene["description"]
     scene_image = scene.get("image", "")

src/game_setting.py CHANGED Viewed

@@ -1,12 +1,25 @@
 from pydantic import BaseModel
 class Character(BaseModel):
     name: str
     age: str
     background: str
     personality: str
 class GameSetting(BaseModel):
     character: Character
     setting: str
     genre: str

 from pydantic import BaseModel
 class Character(BaseModel):
     name: str
     age: str
     background: str
     personality: str
 class GameSetting(BaseModel):
     character: Character
     setting: str
     genre: str
+def get_user_story(
+    scene_description: str, scene_image_description: str, user_choice: str
+) -> str:
+    return f"""Current scene description:
+            {scene_description}
+            Current scene image description: {scene_image_description}
+            User's choice: {user_choice}
+        """

src/game_state.py CHANGED Viewed

@@ -1,10 +1,10 @@
 story = {
     "start": {
         "text": "You wake up in a mysterious forest. What do you do?",
         "image": "forest.jpg",
-        "choices": ["Explore", "Wait"],
         "music_tone": "neutral",
     },
 }
@@ -12,4 +12,4 @@ state = {"scene": "start"}
 def get_current_scene():
     scene = story[state["scene"]]
-    return scene["text"], scene["image"], scene["choices"]

 story = {
     "start": {
         "text": "You wake up in a mysterious forest. What do you do?",
         "image": "forest.jpg",
+        "choices": {"Explore": None, "Wait": None},
         "music_tone": "neutral",
+        "img_description": "forest in the fog",
     },
 }
 def get_current_scene():
     scene = story[state["scene"]]
+    return scene["text"], scene["image"], scene["choices"].keys()

src/images/image_generator.py CHANGED Viewed

@@ -6,25 +6,47 @@ from io import BytesIO
 from datetime import datetime
 from config import settings
 import logging
 logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value()).aio
 async def generate_image(prompt: str) -> tuple[str, str] | None:
     """
     Generate an image using Google's Gemini model and save it to generated/images directory.
     Args:
         prompt (str): The text prompt to generate the image from
     Returns:
         str: Path to the generated image file, or None if generation failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
     logger.info(f"Generating image with prompt: {prompt}")
     try:
@@ -32,8 +54,9 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
             model="gemini-2.0-flash-preview-image-generation",
             contents=prompt,
             config=types.GenerateContentConfig(
-                response_modalities=['TEXT', 'IMAGE'],
-            )
         )
         # Process the response parts
@@ -44,19 +67,20 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the image
                 image = Image.open(BytesIO(part.inline_data.data))
-                image.save(filepath, "PNG")
                 logger.info(f"Image saved to: {filepath}")
                 image_saved = True
-                return filepath, part.text
         if not image_saved:
             logger.error("No image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error generating image: {e}")
         return None, None
@@ -65,38 +89,41 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
 async def modify_image(image_path: str, modification_prompt: str) -> str | None:
     """
     Modify an existing image using Google's Gemini model based on a text prompt.
     Args:
         image_path (str): Path to the existing image file
         modification_prompt (str): The text prompt describing how to modify the image
     Returns:
         str: Path to the modified image file, or None if modification failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
     # Check if the input image exists
     if not os.path.exists(image_path):
         logger.error(f"Error: Image file not found at {image_path}")
         return None
     key = settings.gemini_api_key.get_secret_value()
     client = genai.Client(api_key=key).aio
     try:
         # Load the input image
         input_image = Image.open(image_path)
         # Make the API call with both text and image
         response = await client.models.generate_content(
             model="gemini-2.0-flash-preview-image-generation",
             contents=[modification_prompt, input_image],
             config=types.GenerateContentConfig(
-                response_modalities=['TEXT', 'IMAGE']
-            )
         )
         # Process the response parts
@@ -107,19 +134,20 @@ async def modify_image(image_path: str, modification_prompt: str) -> str | None:
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_modified_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the modified image
                 modified_image = Image.open(BytesIO(part.inline_data.data))
-                modified_image.save(filepath, "PNG")
                 logger.info(f"Modified image saved to: {filepath}")
                 image_saved = True
-                return filepath, part.text
         if not image_saved:
             logger.error("No modified image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error modifying image: {e}")
         return None, None
@@ -129,10 +157,10 @@ if __name__ == "__main__":
     # Example usage
     sample_prompt = "A Luke Skywalker half height sprite with white background for visual novel game"
     generated_image_path = generate_image(sample_prompt)
     # if generated_image_path:
     #     # Example modification
     #     modification_prompt = "Now the house is destroyed, and the jawas are running away"
     #     modified_image_path = modify_image(generated_image_path, modification_prompt)
     #     if modified_image_path:
-    #         print(f"Successfully modified image: {modified_image_path}")

 from datetime import datetime
 from config import settings
 import logging
+import asyncio
+import gradio as gr
 logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value()).aio
+safety_settings = [
+    types.SafetySetting(
+        category="HARM_CATEGORY_HARASSMENT",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+    types.SafetySetting(
+        category="HARM_CATEGORY_HATE_SPEECH",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+    types.SafetySetting(
+        category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+    types.SafetySetting(
+        category="HARM_CATEGORY_DANGEROUS_CONTENT",
+        threshold="BLOCK_NONE",  # Block none
+    ),
+]
 async def generate_image(prompt: str) -> tuple[str, str] | None:
     """
     Generate an image using Google's Gemini model and save it to generated/images directory.
     Args:
         prompt (str): The text prompt to generate the image from
     Returns:
         str: Path to the generated image file, or None if generation failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
     logger.info(f"Generating image with prompt: {prompt}")
     try:
             model="gemini-2.0-flash-preview-image-generation",
             contents=prompt,
             config=types.GenerateContentConfig(
+                response_modalities=["TEXT", "IMAGE"],
+                safety_settings=safety_settings,
+            ),
         )
         # Process the response parts
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the image
                 image = Image.open(BytesIO(part.inline_data.data))
+                await asyncio.to_thread(image.save, filepath, "PNG")
                 logger.info(f"Image saved to: {filepath}")
                 image_saved = True
+                return filepath, prompt
         if not image_saved:
+            gr.Warning("Image was censored by Google!")
             logger.error("No image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error generating image: {e}")
         return None, None
 async def modify_image(image_path: str, modification_prompt: str) -> str | None:
     """
     Modify an existing image using Google's Gemini model based on a text prompt.
     Args:
         image_path (str): Path to the existing image file
         modification_prompt (str): The text prompt describing how to modify the image
     Returns:
         str: Path to the modified image file, or None if modification failed
     """
     # Ensure the generated/images directory exists
     output_dir = "generated/images"
     os.makedirs(output_dir, exist_ok=True)
+    logger.info(f"Modifying current scene image with prompt: {modification_prompt}")
     # Check if the input image exists
     if not os.path.exists(image_path):
         logger.error(f"Error: Image file not found at {image_path}")
         return None
     key = settings.gemini_api_key.get_secret_value()
     client = genai.Client(api_key=key).aio
     try:
         # Load the input image
         input_image = Image.open(image_path)
         # Make the API call with both text and image
         response = await client.models.generate_content(
             model="gemini-2.0-flash-preview-image-generation",
             contents=[modification_prompt, input_image],
             config=types.GenerateContentConfig(
+                response_modalities=["TEXT", "IMAGE"],
+                safety_settings=safety_settings,
+            ),
         )
         # Process the response parts
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                 filename = f"gemini_modified_{timestamp}.png"
                 filepath = os.path.join(output_dir, filename)
                 # Save the modified image
                 modified_image = Image.open(BytesIO(part.inline_data.data))
+                await asyncio.to_thread(modified_image.save, filepath, "PNG")
                 logger.info(f"Modified image saved to: {filepath}")
                 image_saved = True
+                return filepath, modification_prompt
         if not image_saved:
+            gr.Warning("Updated image was censored by Google!")
             logger.error("No modified image was generated in the response.")
             return None, None
     except Exception as e:
         logger.error(f"Error modifying image: {e}")
         return None, None
     # Example usage
     sample_prompt = "A Luke Skywalker half height sprite with white background for visual novel game"
     generated_image_path = generate_image(sample_prompt)
     # if generated_image_path:
     #     # Example modification
     #     modification_prompt = "Now the house is destroyed, and the jawas are running away"
     #     modified_image_path = modify_image(generated_image_path, modification_prompt)
     #     if modified_image_path:
+    #         print(f"Successfully modified image: {modified_image_path}")

src/main.py CHANGED Viewed

@@ -5,6 +5,8 @@ from audio.audio_generator import (
     cleanup_music_session,
 )
 import logging
 from agent.runner import process_step
 import uuid
 from game_constructor import (
@@ -15,6 +17,9 @@ from game_constructor import (
     load_character_suggestion,
     start_game_with_settings,
 )
 logger = logging.getLogger(__name__)
@@ -125,7 +130,7 @@ with gr.Blocks(
     # Fullscreen Loading Indicator (hidden by default)
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
     local_storage = gr.BrowserState(str(uuid.uuid4()), "user_hash")
     # Constructor Interface (visible by default)

     cleanup_music_session,
 )
 import logging
+from agent.llm_agent import process_user_input
+from images.image_generator import modify_image
 from agent.runner import process_step
 import uuid
 from game_constructor import (
     load_character_suggestion,
     start_game_with_settings,
 )
+import asyncio
+from game_setting import get_user_story
+from config import settings
 logger = logging.getLogger(__name__)
     # Fullscreen Loading Indicator (hidden by default)
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
     local_storage = gr.BrowserState(str(uuid.uuid4()), "user_hash")
     # Constructor Interface (visible by default)