Spaces:

rkihacker
/

Multimodal-Moderation-Demo

Running

App Files Files Community

rkihacker commited on 16 days ago

Commit

e0f5861

verified ·

1 Parent(s): aa5e579

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -132

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-# === Gradio Demo App: gradio_app.py ===
 # This script creates a user-friendly web interface to demonstrate the
 # multimodal moderation capabilities of the main FastAPI server.
 #
 # It interacts with the /v3/moderations endpoint.
 # --------------------------------------------------------------------
 import base64
@@ -19,11 +20,9 @@ from dotenv import load_dotenv
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 load_dotenv()
-# The URL of your running FastAPI server.
-# It's crucial to set this in your .env file for deployment.
-API_BASE_URL = os.environ.get("API_BASE_URL", "")
 MODERATION_ENDPOINT = f"{API_BASE_URL}/v3/moderations"
 # --- Full list of Whisper V3 supported languages ---
 # Mapping user-friendly names to ISO 639-1 codes
 WHISPER_LANGUAGES = {
@@ -48,33 +47,27 @@ WHISPER_LANGUAGES = {
     "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw",
     "Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su",
 }
-# Sort languages alphabetically for the dropdown
 SORTED_LANGUAGES = dict(sorted(WHISPER_LANGUAGES.items()))
-# --- Helper Function ---
 def file_to_base64(filepath: str) -> str:
-    """Reads a file and converts it to a base64 encoded string."""
-    if not filepath:
-        return None
     try:
         with open(filepath, "rb") as f:
-            encoded_string = base64.b64encode(f.read()).decode("utf-8")
-        return encoded_string
     except Exception as e:
         logging.error(f"Failed to convert file {filepath} to base64: {e}")
         return None
-# --- Core Logic ---
 def moderate_content(text_input, image_input, video_input, audio_input, language_full_name):
-    """
-    Prepares the payload, calls the moderation API, and formats the response.
-    """
     if not any([text_input, image_input, video_input, audio_input]):
-        return "Please provide at least one input (text, image, video, or audio).", None
     logging.info("Preparing payload for moderation API...")
-    payload = { "model": "nai-moderation-latest" }
     if text_input: payload["input"] = text_input
     if image_b64 := file_to_base64(image_input): payload["image"] = image_b64
     if video_b64 := file_to_base64(video_input): payload["video"] = video_b64
@@ -83,154 +76,127 @@ def moderate_content(text_input, image_input, video_input, audio_input, language
         language_code = SORTED_LANGUAGES.get(language_full_name, "en")
         payload["language"] = language_code
         logging.info(f"Audio detected. Using language: {language_full_name} ({language_code})")
     logging.info(f"Sending request to {MODERATION_ENDPOINT} with inputs: {list(payload.keys())}")
-    summary_output = "An error occurred. Please check the logs."
-    full_response_output = {}
     latency_ms = None
     try:
         with httpx.Client(timeout=180.0) as client:
-            start_time = time.monotonic()
             response = client.post(MODERATION_ENDPOINT, json=payload)
             latency_ms = (time.monotonic() - start_time) * 1000
             logging.info(f"API response received in {latency_ms:.2f} ms with status code {response.status_code}")
             response.raise_for_status()
             data = response.json()
-            full_response_output = data # <-- MODIFIED: Assign raw data, without adding latency
             if not data.get("results"):
-                summary_output = "API returned an empty result. This might happen if media processing fails (e.g., a video with no frames)."
-                return summary_output, full_response_output
             result = data["results"][0]
-            status = "🚨 FLAGGED 🚨" if result["flagged"] else "✅ SAFE ✅"
-            reason = result.get("reason") or "N/A"
-            transcribed = result.get("transcribed_text") or "N/A"
             flagged_categories = [cat for cat, flagged in result.get("categories", {}).items() if flagged]
             categories_str = ", ".join(flagged_categories) if flagged_categories else "None"
-            summary_output = f"""
-            **API Latency:** {latency_ms:.2f} ms
-            ---
-            **Moderation Status:** {status}
-            ---
-            **Reason:** {reason}
-            ---
-            **Flagged Categories:** {categories_str}
-            ---
-            **Transcribed Text (from audio):**
-            {transcribed}
-            """
             logging.info("Successfully parsed moderation response.")
     except httpx.HTTPStatusError as e:
-        user_message = "The moderation service returned an error."
-        error_details = ""
-        latency_str = f"**API Latency:** {latency_ms:.2f} ms" if latency_ms is not None else ""
         try:
             error_json = e.response.json()
             detail = error_json.get("detail", "No specific error detail provided.")
-            error_details = f"**Reason:** {detail}"
-            # <-- MODIFIED: Latency removed from this dictionary
-            full_response_output = {"error": "Backend API Error", "status_code": e.response.status_code, "details": error_json}
         except (json.JSONDecodeError, AttributeError):
-            error_details = f"**Raw Server Response:**\n```\n{e.response.text}\n```"
-            # <-- MODIFIED: Latency removed from this dictionary
-            full_response_output = {"error": "Backend API Error", "status_code": e.response.status_code, "details": e.response.text}
-        summary_output = f"""
-        **🚫 Error from Moderation Service (HTTP {e.response.status_code})**
-        ---
-        {latency_str}
-        {user_message}
-        {error_details}
-        """
         logging.error(f"HTTP Status Error: {e.response.status_code} - Response: {e.response.text}")
     except httpx.RequestError as e:
-        if latency_ms is None:
-            latency_ms = (time.monotonic() - start_time) * 1000 if 'start_time' in locals() else 0
-        summary_output = f"""
-        **🔌 Connection Error**
-        ---
-        Could not connect to the API server at `{API_BASE_URL}`. The request failed after {latency_ms:.0f} ms.
-        Please ensure the backend server is running and the URL is configured correctly in your `.env` file.
-        """
-        # <-- MODIFIED: Latency removed from this dictionary
-        full_response_output = {"error": "Connection Error", "url": API_BASE_URL, "details": str(e)}
         logging.error(f"Request Error: Could not connect to {API_BASE_URL}. Details: {e}")
     except Exception as e:
-        summary_output = f"""
-        **💥 An Unexpected Application Error Occurred**
-        ---
-        An error happened within the Gradio application itself.
-        Please check the application logs for more details.
-        **Error Type:** `{type(e).__name__}`
-        """
-        full_response_output = {"error": "Gradio App Internal Error", "type": type(e).__name__, "details": str(e)}
         logging.error(f"Unexpected Error in Gradio App: {e}", exc_info=True)
-    return summary_output, full_response_output
 # --- Gradio Interface ---
-with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
     gr.Markdown(
         """
         # 🤖 Multimodal Content Moderation Demo
-        This demo uses a custom API server to perform advanced content moderation.
-        You can provide any combination of text, image, video, and audio. The system will analyze all inputs together.
         """
     )
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 1. Provide Your Content")
-            text_input = gr.Textbox(label="Text Input", lines=4, placeholder="Enter any text here...")
-            image_input = gr.Image(label="Image Input", type="filepath")
-            video_input = gr.Video(label="Video Input")
-            audio_input = gr.Audio(label="Voice/Audio Input", type="filepath")
-            language_input = gr.Dropdown(
-                label="Audio Language (if providing audio)",
-                choices=list(SORTED_LANGUAGES.keys()),
-                value="English",
-                interactive=True
-            )
-            submit_button = gr.Button("Moderate Content", variant="primary")
         with gr.Column(scale=2):
-            gr.Markdown("### 2. See the Results")
-            result_output = gr.Markdown(label="Moderation Summary")
-            full_response_output = gr.JSON(label="Full API Response")
-    submit_button.click(
-        fn=moderate_content,
-        inputs=[text_input, image_input, video_input, audio_input, language_input],
-        outputs=[result_output, full_response_output]
-    )
     gr.Examples(
         examples=[
-            ["This is a test of the system with safe text.", None, None, None, "English"],
-            ["I am going to kill the process on my computer.", None, None, None, "English"],
         ],
         inputs=[text_input, image_input, video_input, audio_input, language_input],
-        outputs=[result_output, full_response_output],
-        fn=moderate_content
     )
 if __name__ == "__main__":
     logging.info(f"Connecting to API server at: {API_BASE_URL}")

+# === Gradio Demo App: gradio_app.py (Backward-Compatible Version) ===
 # This script creates a user-friendly web interface to demonstrate the
 # multimodal moderation capabilities of the main FastAPI server.
 #
 # It interacts with the /v3/moderations endpoint.
+# NOTE: This version removes the "Copy" button for compatibility with older Gradio versions.
 # --------------------------------------------------------------------
 import base64
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 load_dotenv()
+API_BASE_URL = os.environ.get("API_BASE_URL", "http://127.0.0.1:8000")
 MODERATION_ENDPOINT = f"{API_BASE_URL}/v3/moderations"
+# ... (rest of the configuration and helper functions remain the same) ...
 # --- Full list of Whisper V3 supported languages ---
 # Mapping user-friendly names to ISO 639-1 codes
 WHISPER_LANGUAGES = {
     "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as", "Tatar": "tt", "Hawaiian": "haw",
     "Lingala": "ln", "Hausa": "ha", "Bashkir": "ba", "Javanese": "jw", "Sundanese": "su",
 }
 SORTED_LANGUAGES = dict(sorted(WHISPER_LANGUAGES.items()))
 def file_to_base64(filepath: str) -> str:
+    if not filepath: return None
     try:
         with open(filepath, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")
     except Exception as e:
         logging.error(f"Failed to convert file {filepath} to base64: {e}")
         return None
+def create_status_banner(status_type, text):
+    colors = {"safe": ("#DFF2BF", "#4F8A10"),"flagged": ("#FFD2D2", "#D8000C"),"error": ("#FEEFB3", "#9F6000"),"info": ("#BDE5F8", "#00529B"),}
+    bg_color, text_color = colors.get(status_type, ("#E0E0E0", "#000000"))
+    return f"<div style='background-color:{bg_color}; padding: 1rem; border-radius: 8px; margin-bottom: 1rem; border: 1px solid {text_color};'><h2 style='color:{text_color}; text-align:center; margin:0; font-size: 1.5rem;'>{text}</h2></div>"
+def clear_outputs():
+    initial_text = "Results will appear here after submission."
+    return (create_status_banner("info", "SUBMIT CONTENT FOR MODERATION"),"N/A",initial_text,initial_text,initial_text,None,)
 def moderate_content(text_input, image_input, video_input, audio_input, language_full_name):
     if not any([text_input, image_input, video_input, audio_input]):
+        return (create_status_banner("error", "🚫 NO INPUT PROVIDED 🚫"),"N/A","Please provide at least one input (text, image, video, or audio) before submitting.","N/A", "N/A", None)
     logging.info("Preparing payload for moderation API...")
+    payload = {"model": "nai-moderation-latest"}
     if text_input: payload["input"] = text_input
     if image_b64 := file_to_base64(image_input): payload["image"] = image_b64
     if video_b64 := file_to_base64(video_input): payload["video"] = video_b64
         language_code = SORTED_LANGUAGES.get(language_full_name, "en")
         payload["language"] = language_code
         logging.info(f"Audio detected. Using language: {language_full_name} ({language_code})")
     logging.info(f"Sending request to {MODERATION_ENDPOINT} with inputs: {list(payload.keys())}")
     latency_ms = None
+    start_time = time.monotonic()
     try:
         with httpx.Client(timeout=180.0) as client:
             response = client.post(MODERATION_ENDPOINT, json=payload)
             latency_ms = (time.monotonic() - start_time) * 1000
             logging.info(f"API response received in {latency_ms:.2f} ms with status code {response.status_code}")
             response.raise_for_status()
             data = response.json()
             if not data.get("results"):
+                return (create_status_banner("error", "EMPTY API RESPONSE"), f"{latency_ms:.2f} ms", "The API returned an empty result. This can happen if media processing fails (e.g., a video with no valid frames).", "N/A", "N/A", data)
             result = data["results"][0]
+            status_text, status_type = ("🚨 FLAGGED 🚨", "flagged") if result["flagged"] else ("✅ SAFE ✅", "safe")
+            status_banner = create_status_banner(status_type, status_text)
+            reason = result.get("reason") or "No specific reason provided."
+            transcribed = result.get("transcribed_text") or "No audio was provided or transcription was not applicable."
             flagged_categories = [cat for cat, flagged in result.get("categories", {}).items() if flagged]
             categories_str = ", ".join(flagged_categories) if flagged_categories else "None"
             logging.info("Successfully parsed moderation response.")
+            return (status_banner,f"{latency_ms:.2f} ms",reason,categories_str,transcribed,data)
     except httpx.HTTPStatusError as e:
+        latency_str = f"{latency_ms:.2f} ms" if latency_ms is not None else "N/A"
+        full_response, error_details = {}, ""
         try:
             error_json = e.response.json()
             detail = error_json.get("detail", "No specific error detail provided.")
+            error_details = f"Server responded with error: {detail}"
+            full_response = {"error": "Backend API Error", "status_code": e.response.status_code, "details": error_json}
         except (json.JSONDecodeError, AttributeError):
+            error_details = f"Could not decode the server's error response:\n{e.response.text}"
+            full_response = {"error": "Backend API Error", "status_code": e.response.status_code, "details": e.response.text}
         logging.error(f"HTTP Status Error: {e.response.status_code} - Response: {e.response.text}")
+        return (create_status_banner("error", f"🚫 API ERROR (HTTP {e.response.status_code}) 🚫"), latency_str, error_details, "N/A", "N/A", full_response)
     except httpx.RequestError as e:
+        latency_ms = (time.monotonic() - start_time) * 1000
+        error_msg = f"Could not connect to the API server at `{API_BASE_URL}`. Please ensure the backend server is running and the URL is correctly configured."
         logging.error(f"Request Error: Could not connect to {API_BASE_URL}. Details: {e}")
+        return (create_status_banner("error", "🔌 CONNECTION ERROR 🔌"), f"{latency_ms:.0f} ms", error_msg, "N/A", "N/A", {"error": "Connection Error", "url": API_BASE_URL, "details": str(e)})
     except Exception as e:
         logging.error(f"Unexpected Error in Gradio App: {e}", exc_info=True)
+        return (create_status_banner("error", "💥 UNEXPECTED APP ERROR 💥"),"N/A",f"An unexpected error occurred within the Gradio application itself: {type(e).__name__}","N/A", "N/A",{"error": "Gradio App Internal Error", "type": type(e).__name__, "details": str(e)})
 # --- Gradio Interface ---
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="footer {display: none !important}") as demo:
     gr.Markdown(
         """
         # 🤖 Multimodal Content Moderation Demo
+        This interface demonstrates a powerful, multi-input moderation API.
+        Provide any combination of text, image, video, and audio. The system will analyze all inputs together for a comprehensive result.
         """
     )
+    with gr.Row(variant="panel"):
         with gr.Column(scale=2):
+            gr.Markdown("### 1. Provide Your Content")
+            with gr.Tabs():
+                with gr.TabItem("📝 Text"):
+                    text_input = gr.Textbox(label="Text Input", lines=8, placeholder="Enter any text here...")
+                with gr.TabItem("🖼️ Image"):
+                    image_input = gr.Image(label="Image Input", type="filepath")
+                with gr.TabItem("🎬 Video"):
+                    video_input = gr.Video(label="Video Input")
+                with gr.TabItem("🎤 Audio"):
+                    audio_input = gr.Audio(label="Voice/Audio Input", type="filepath")
+                    language_input = gr.Dropdown(label="Audio Language", choices=list(SORTED_LANGUAGES.keys()), value="English", interactive=True)
+            with gr.Row():
+                clear_button = gr.Button("Clear All")
+                submit_button = gr.Button("▶️ Moderate Content", variant="primary")
+        with gr.Column(scale=3):
+            gr.Markdown("### 2. Moderation Results")
+            status_output = gr.Markdown(value=create_status_banner("info", "AWAITING SUBMISSION"))
+            with gr.Group():
+                with gr.Row():
+                    latency_output = gr.Textbox(label="⏱️ API Latency", interactive=False)
+                    categories_output = gr.Textbox(label="🏷️ Flagged Categories", interactive=False)
+                reason_output = gr.Textbox(label="⚖️ Reason", interactive=False, lines=2)
+                # MODIFICATION: The Copy button and its surrounding Row have been removed.
+                transcription_output = gr.Textbox(label="🎤 Transcribed Text (from audio)", interactive=False, lines=4)
+            with gr.Accordion("Full API Response (JSON)", open=False):
+                full_response_output = gr.JSON(label="Raw JSON Response")
+    demo.load(fn=clear_outputs, inputs=None, outputs=[status_output, latency_output, reason_output, categories_output, transcription_output, full_response_output])
+    gr.Markdown("---")
+    gr.Markdown(
+        """
+        ### 💡 Quick Examples
+        <p style='color: #666; font-size: 0.9rem;'>
+        <b>⚠️ Content Warning:</b> The examples below include text that may be offensive or disturbing (e.g., hate speech, violence, sexual content).
+        They are provided solely to demonstrate the capabilities of the moderation model.
+        </p>
+        """
+    )
     gr.Examples(
         examples=[
+            ["The sun is shining and the birds are singing. It's a beautiful day for a walk in the park.", None, None, None, "English"],
+            ["I'm going to kill the process on my computer because it's using too much memory.", None, None, None, "English"],
+            ["If you don't give me what I want, I will hunt you down and hurt you.", None, None, None, "English"],
+            ["I can't stand people from that country, they are all lazy and untrustworthy.", None, None, None, "English"],
+            ["I feel so hopeless and alone. I don't see the point in going on anymore.", None, None, None, "English"],
+            ["Looking for a partner for some wild, no-strings-attached fun tonight. Must be over 18.", None, None, None, "English"],
+            ["She looks so young and innocent in that picture, I love it.", None, None, None, "English"],
         ],
         inputs=[text_input, image_input, video_input, audio_input, language_input],
+        fn=moderate_content,
+        outputs=[status_output, latency_output, reason_output, categories_output, transcription_output, full_response_output],
+        cache_examples=False,
+    )
+    # --- Event Handlers (Backward-Compatible) ---
+    all_inputs = [text_input, image_input, video_input, audio_input, language_input]
+    all_outputs = [status_output, latency_output, reason_output, categories_output, transcription_output, full_response_output]
+    submit_button.click(fn=moderate_content, inputs=all_inputs, outputs=all_outputs)
+    clear_button.click(
+        fn=lambda: (None, None, None, None, *clear_outputs()),
+        inputs=None,
+        outputs=[text_input, image_input, video_input, audio_input, *all_outputs],
+        queue=False
     )
+    # MODIFICATION: The copy_button.click() handler has been removed entirely.
 if __name__ == "__main__":
     logging.info(f"Connecting to API server at: {API_BASE_URL}")