Spaces:

mgbam
/

builder

Running

App Files Files Community

mgbam commited on about 1 month ago

Commit

b134386

verified ·

1 Parent(s): bf0d7be

Rename chat_processing.py to core.py

Browse files

Files changed (2) hide show

chat_processing.py +0 -213
core.py +122 -0

chat_processing.py DELETED Viewed

@@ -1,213 +0,0 @@
-import re
-from typing import Dict, List, Optional, Tuple
-import base64
-import numpy as np
-from PIL import Image
-import gradio as gr
-from config import GRADIO_SUPPORTED_LANGUAGES, SEARCH_START, DIVIDER, REPLACE_END
-History = List[Tuple[str, str]]
-Messages = List[Dict[str, str]]
-def get_gradio_language(language):
-    return language if language in GRADIO_SUPPORTED_LANGUAGES else None
-def history_to_messages(history: History, system: str) -> Messages:
-    messages = [{'role': 'system', 'content': system}]
-    for h in history:
-        # Handle multimodal content in history
-        user_content = h[0]
-        if isinstance(user_content, list):
-            # Extract text from multimodal content
-            text_content = ""
-            for item in user_content:
-                if isinstance(item, dict) and item.get("type") == "text":
-                    text_content += item.get("text", "")
-            user_content = text_content if text_content else str(user_content)
-        messages.append({'role': 'user', 'content': user_content})
-        messages.append({'role': 'assistant', 'content': h[1]})
-    return messages
-def messages_to_history(messages: Messages) -> History:
-    assert messages[0]['role'] == 'system'
-    history = []
-    for q, r in zip(messages[1::2], messages[2::2]):
-        # Extract text content from multimodal messages for history
-        user_content = q['content']
-        if isinstance(user_content, list):
-            text_content = ""
-            for item in user_content:
-                if isinstance(item, dict) and item.get("type") == "text":
-                    text_content += item.get("text", "")
-            user_content = text_content if text_content else str(user_content)
-        history.append((user_content, r['content']))
-    return history
-def history_to_chatbot_messages(history: History) -> List[Dict[str, str]]:
-    """Convert history tuples to chatbot message format"""
-    messages = []
-    for user_msg, assistant_msg in history:
-        # Handle multimodal content
-        if isinstance(user_msg, list):
-            text_content = ""
-            for item in user_msg:
-                if isinstance(item, dict) and item.get("type") == "text":
-                    text_content += item.get("text", "")
-            user_msg = text_content if text_content else str(user_msg)
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": assistant_msg})
-    return messages
-def remove_code_block(text):
-    # Try to match code blocks with language markers
-    patterns = [
-        r'```(?:html|HTML)\n([\s\S]+?)\n```',  # Match ```html or ```HTML
-        r'```\n([\s\S]+?)\n```',               # Match code blocks without language markers
-        r'```([\s\S]+?)```'                      # Match code blocks without line breaks
-    ]
-    for pattern in patterns:
-        match = re.search(pattern, text, re.DOTALL)
-        if match:
-            extracted = match.group(1).strip()
-            return extracted
-    # If no code block is found, check if the entire text is HTML
-    if text.strip().startswith('<!DOCTYPE html>') or text.strip().startswith('<html') or text.strip().startswith('<'):
-        return text.strip()
-    return text.strip()
-def clear_history():
-    return [], [], None, ""  # Empty lists for both tuple format and chatbot messages, None for file, empty string for website URL
-def update_image_input_visibility(model):
-    """Update image input visibility based on selected model"""
-    is_ernie_vl = model.get("id") == "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
-    is_glm_vl = model.get("id") == "THUDM/GLM-4.1V-9B-Thinking"
-    return gr.update(visible=is_ernie_vl or is_glm_vl)
-def update_submit_button(query):
-    """Enable submit button if query is not empty"""
-    return gr.update(interactive=bool(query))
-def create_multimodal_message(text, image=None):
-    """Create a multimodal message with text and optional image"""
-    if image is None:
-        return {"role": "user", "content": text}
-    from file_processing import process_image_for_model
-    content = [
-        {
-            "type": "text",
-            "text": text
-        },
-        {
-            "type": "image_url",
-            "image_url": {
-                "url": process_image_for_model(image)
-            }
-        }
-    ]
-    return {"role": "user", "content": content}
-def apply_search_replace_changes(original_html: str, changes_text: str) -> str:
-    """Apply search/replace changes to HTML content"""
-    if not changes_text.strip():
-        return original_html
-    # Split the changes text into individual search/replace blocks
-    blocks = []
-    current_block = ""
-    lines = changes_text.split('\n')
-    for line in lines:
-        if line.strip() == SEARCH_START:
-            if current_block.strip():
-                blocks.append(current_block.strip())
-            current_block = line + '\n'
-        elif line.strip() == REPLACE_END:
-            current_block += line + '\n'
-            blocks.append(current_block.strip())
-            current_block = ""
-        else:
-            current_block += line + '\n'
-    if current_block.strip():
-        blocks.append(current_block.strip())
-    modified_html = original_html
-    for block in blocks:
-        if not block.strip():
-            continue
-        # Parse the search/replace block
-        lines = block.split('\n')
-        search_lines = []
-        replace_lines = []
-        in_search = False
-        in_replace = False
-        for line in lines:
-            if line.strip() == SEARCH_START:
-                in_search = True
-                in_replace = False
-            elif line.strip() == DIVIDER:
-                in_search = False
-                in_replace = True
-            elif line.strip() == REPLACE_END:
-                in_replace = False
-            elif in_search:
-                search_lines.append(line)
-            elif in_replace:
-                replace_lines.append(line)
-        # Apply the search/replace
-        if search_lines:
-            search_text = '\n'.join(search_lines).strip()
-            replace_text = '\n'.join(replace_lines).strip()
-            if search_text in modified_html:
-                modified_html = modified_html.replace(search_text, replace_text)
-            else:
-                print(f"Warning: Search text not found in HTML: {search_text[:100]}...")
-    return modified_html
-def send_to_sandbox(code):
-    # Add a wrapper to inject necessary permissions and ensure full HTML
-    wrapped_code = f"""
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <meta charset=\"UTF-8\">
-        <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">
-        <script>
-            // Safe localStorage polyfill
-            const safeStorage = {{
-                _data: {{}},
-                getItem: function(key) {{ return this._data[key] || null; }},
-                setItem: function(key, value) {{ this._data[key] = value; }},
-                removeItem: function(key) {{ delete this._data[key]; }},
-                clear: function() {{ this._data = {{}}; }}
-            }};
-            Object.defineProperty(window, 'localStorage', {{
-                value: safeStorage,
-                writable: false
-            }});
-            window.onerror = function(message, source, lineno, colno, error) {{
-                console.error('Error:', message);
-            }};
-        </script>
-    </head>
-    <body>
-        {code}
-    </body>
-    </html>
-    """
-    encoded_html = base64.b64encode(wrapped_code.encode('utf-8')).decode('utf-8')
-    data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
-    iframe = f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>'
-    return iframe

core.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# /core.py
+"""
+Core business logic for the code generation application.
+This module orchestrates the entire process from receiving a user query to
+generating the final code. It interacts with the services, extractors, and
+utility modules to fulfill the request.
+"""
+from typing import Dict, List, Optional, Tuple, Generator, Any
+from config import (
+    HTML_SYSTEM_PROMPT, GENERIC_SYSTEM_PROMPT,
+    HTML_SYSTEM_PROMPT_WITH_SEARCH, GENERIC_SYSTEM_PROMPT_WITH_SEARCH,
+    FOLLOW_UP_SYSTEM_PROMPT, SEARCH_START
+)
+from services import llm_service, search_service
+from extractor import extract_text_from_file, extract_website_content
+from utils import (
+    history_to_messages, remove_code_block, process_image_for_model,
+    apply_search_replace
+)
+# --- Type Definitions ---
+History = List[Tuple[Optional[str], Optional[str]]]
+def _determine_system_prompt(language: str, enable_search: bool, history: History) -> Tuple[str, bool]:
+    """Determines the appropriate system prompt based on context."""
+    is_follow_up = False
+    if history and history[-1][1] and ("<!DOCTYPE html>" in history[-1][1] or "<html" in history[-1][1]):
+        is_follow_up = True
+        return FOLLOW_UP_SYSTEM_PROMPT, is_follow_up
+    if language == "html":
+        prompt = HTML_SYSTEM_PROMPT_WITH_SEARCH if enable_search else HTML_SYSTEM_PROMPT
+    else:
+        base_prompt = GENERIC_SYSTEM_PROMPT_WITH_SEARCH if enable_search else GENERIC_SYSTEM_PROMPT
+        prompt = base_prompt.format(language=language)
+    return prompt, is_follow_up
+def _prepare_user_content(
+    query: str, image_data: Optional[Any], file_path: Optional[str],
+    website_url: Optional[str], enable_search: bool
+) -> any:
+    """Constructs the final user prompt including context from files, web, and search."""
+    context_parts = [query]
+    if file_path:
+        file_text = extract_text_from_file(file_path)
+        context_parts.append(f"\n\n--- Reference File Content ---\n{file_text[:8000]}")
+    if website_url:
+        web_text = extract_website_content(website_url)
+        context_parts.append(f"\n\n--- Website Content for Redesign ---\n{web_text[:8000]}")
+    full_query = "".join(context_parts)
+    if enable_search and search_service.is_available():
+        search_results = search_service.search(full_query)
+        full_query += f"\n\n--- Web Search Results ---\n{search_results}"
+    if image_data is not None:
+        return [
+            {"type": "text", "text": full_query},
+            {"type": "image_url", "image_url": {"url": process_image_for_model(image_data)}}
+        ]
+    return full_query
+def generate_code(
+    query: str,
+    image_data: Optional[Any],
+    file_path: Optional[str],
+    website_url: Optional[str],
+    history: History,
+    model_config: Dict[str, str],
+    enable_search: bool,
+    language: str
+) -> Generator[Dict[str, Any], None, None]:
+    """
+    Main generator function to handle a user request and stream responses.
+    """
+    system_prompt, is_follow_up = _determine_system_prompt(language, enable_search, history)
+    messages = history_to_messages(history, system_prompt)
+    user_content = _prepare_user_content(query, image_data, file_path, website_url, enable_search)
+    messages.append({'role': 'user', 'content': user_content})
+    content_stream = ""
+    stream = llm_service.generate_code_stream(model_config['id'], messages)
+    for chunk in stream:
+        content_stream += chunk
+        if is_follow_up:
+            # For follow-ups, we apply changes incrementally
+            # This logic assumes the model sends complete change blocks.
+            last_html = history[-1][1] if history and history[-1][1] else ""
+            # Simple check to see if a full replacement block is present
+            if SEARCH_START in content_stream and content_stream.count(SEARCH_START) == content_stream.count(DIVIDER):
+                 modified_html = apply_search_replace(last_html, content_stream)
+                 processed_code = modified_html
+            else:
+                 # If not a full block, show the raw diff for user to see progress
+                 processed_code = f"Applying Changes:\n\n{content_stream}"
+        else:
+            processed_code = remove_code_block(content_stream)
+        yield {"code_output": processed_code}
+    # Final processing after stream ends
+    final_content = content_stream
+    if is_follow_up:
+        last_html = history[-1][1] if history and history[-1][1] else ""
+        final_code = apply_search_replace(last_html, final_content)
+    else:
+        final_code = remove_code_block(final_content)
+    # Add the interaction to history
+    # For user content, use the original query, not the enhanced one, for cleaner history display
+    history.append((query, final_code))
+    yield {"code_output": final_code, "history": history}