Spaces:

google
/

rad_learning_companion

Running on CPU Upgrade

App Files Files

chandru1652 commited on 9 days ago

Commit

81cdd5f

1 Parent(s): fdee41f

Initial public commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
.gitignore +6 -0
Dockerfile +90 -0
README.md +15 -5
backend/app.py +143 -0
backend/background_task_manager.py +76 -0
backend/cache_manager.py +67 -0
backend/case_util.py +223 -0
backend/config.py +38 -0
backend/data/reports/1.txt +1 -0
backend/data/reports/2.txt +1 -0
backend/data/reports_manifest.csv +3 -0
backend/data/who_chestxray_guideline_9241546778_eng.pdf +3 -0
backend/default_cache/README.md +1 -0
backend/default_cache/rad-learn-cache.zip +3 -0
backend/llm_client.py +291 -0
backend/models.py +108 -0
backend/prompts.py +165 -0
backend/rag/__init__.py +0 -0
backend/rag/knowledge_base.py +568 -0
backend/rag/model_manager.py +102 -0
backend/rag/rag_context_engine.py +226 -0
backend/rag/siglip_embedder.py +59 -0
backend/requirements.txt +47 -0
backend/routes.py +218 -0
frontend/index.html +29 -0
frontend/package.json +22 -0
frontend/public/index.html +35 -0
frontend/public/vite.svg +0 -0
frontend/src/App.css +90 -0
frontend/src/App.jsx +107 -0
frontend/src/assets/home_chest_logo.jpg +0 -0
frontend/src/components/ChatMessage.jsx +40 -0
frontend/src/components/ChatMessage.module.css +70 -0
frontend/src/components/DetailsOverlay.jsx +122 -0
frontend/src/components/DetailsOverlay.module.css +143 -0
frontend/src/components/JourneyCard.jsx +36 -0
frontend/src/components/JourneyCard.module.css +80 -0
frontend/src/components/MCQOption.jsx +41 -0
frontend/src/components/MCQOption.module.css +56 -0
frontend/src/components/RedactedTextView.js +42 -0
frontend/src/components/TextWithTooltips.jsx +47 -0
frontend/src/data/constants.js +23 -0
frontend/src/data/medicalTerms.js +32 -0
frontend/src/icons/IconArticlePerson.jsx +38 -0
frontend/src/icons/IconAstrophotography.jsx +31 -0
frontend/src/icons/IconBackArrow.jsx +35 -0
frontend/src/icons/IconClose.jsx +28 -0
frontend/src/icons/IconCodeBlocks.jsx +37 -0
frontend/src/icons/IconGemma.jsx +50 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+backend/data/images/ filter=lfs diff=lfs merge=lfs -text
+backend/data/images/1.png filter=lfs diff=lfs merge=lfs -text
+backend/data/images/2.png filter=lfs diff=lfs merge=lfs -text
+backend/data/who_chestxray_guideline_9241546778_eng.pdf filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+.venv/
+.idea/*
+.DS_Store
+/frontend/node_modules/
+/frontend/package-lock.json

Dockerfile ADDED Viewed

	@@ -0,0 +1,90 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --- Stage 1: Build the React Frontend ---
+FROM node:20-slim AS frontend-builder
+WORKDIR /app
+# Add a build argument to force a rebuild (and not use cache) when new code is pushed
+ARG CACHE_BUSTER=1
+COPY frontend/package.json ./
+RUN npm install
+COPY frontend/ .
+RUN npm run build
+# --- Stage 2: Build the Final Production Image with Flask ---
+FROM python:3.10-slim
+ENV PYTHONUNBUFFERED=1
+ENV CACHE_DIR=/data/cache
+# Set the NLTK data path environment variable.
+# This tells NLTK where to look for data for ALL users.
+ENV NLTK_DATA=/usr/local/share/nltk_data
+# Install system dependencies first, as they change less frequently
+RUN apt-get update && \
+    apt-get install -y unzip --no-install-recommends
+RUN useradd -m -s /bin/bash -u 1000 user
+WORKDIR /app
+# Copy and install Python requirements from the backend folder
+COPY --chown=user:user backend/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Download the required NLTK data.
+# This command downloads it to the directory specified by $NLTK_DATA.
+RUN python -m nltk.downloader -d $NLTK_DATA punkt punkt_tab
+# Copy the entire backend application code
+COPY --chown=user:user backend/ .
+# Copy the built frontend from the first stage into the correct directory
+COPY --chown=user:user --from=frontend-builder /app/dist ./frontend/dist
+RUN mkdir -p $CACHE_DIR
+RUN chmod -R 777 $CACHE_DIR
+# Define the path to your potential zip file
+ENV ZIP_FILE_PATH ./default_cache/rad-learn-cache.zip
+# Conditionally unzip the file
+RUN if [ -f "$ZIP_FILE_PATH" ]; then \
+    unzip -o "$ZIP_FILE_PATH" -d $CACHE_DIR && \
+    chmod -R 777 $CACHE_DIR && \
+    rm "$ZIP_FILE_PATH"; \
+fi
+RUN mkdir /app/persistent_cache \
+          /app/processed_figures_kb \
+          /app/chroma_db_store
+RUN chown user:user /app/persistent_cache \
+                     /app/processed_figures_kb \
+                     /app/chroma_db_store
+# Switch to the non-root user for security
+USER user
+# Expose the port
+EXPOSE 7860
+# Run the production server
+CMD ["gunicorn", \
+     "--bind", "0.0.0.0:7860", \
+     "--timeout", "600", \
+     "--worker-class", "gthread", \
+     "--workers", "1", \
+     "--threads", "4", \
+     "app:app"]

README.md CHANGED Viewed

@@ -1,12 +1,22 @@
 ---
-title: Rad Learning Companion
 emoji: 🏃
-colorFrom: red
-colorTo: red
 sdk: docker
 pinned: false
 license: apache-2.0
-short_description: Radiology Learning Companion Demo - built with MedGemma
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Radiology Learning Companion
 emoji: 🏃
+colorFrom: indigo
+colorTo: indigo
 sdk: docker
 pinned: false
 license: apache-2.0
+short_description: A demo showcasing a medical learning experience of CXR image
 ---
+**Radiology Learning Companion Demo – Built with MedGemma**
+Imagine a learning environment where interacting directly with a Chest X-Ray (CXR) image significantly boosts your understanding. That's precisely what the Radiology Learning Companion Demo offers. This web application is an interactive educational tool tailored for medical students—to hone their radiological assessment skills for CXRs.
+Radiology Learning Companion Demo demonstrates how to harness MedGemma's multimodal capabilities, combining medical image interpretation and robust medical reasoning. In this demo we show that users can start by selecting an image from a library of 2 CXRs. Developers can build their own library of images. This demo uses MedGemma's internal radiological assessment hypothesis and relevant clinical guidelines, and presents the user with a series of targeted multiple-choice questions.
+After the user goes through their learning journey, Radiology Learning Companion Demo reveals its own interpretation, providing a clear rationale based on CXR findings and established guidelines. It then offers a comparative analysis against your responses, designed to deepen your understanding and validate your clinical observations.
+You as a developer can use this approach to include other guidelines using RAG or other prompts and context to tailor and build such a learning companion.
+*Note: This demo utilizes non-DICOM Chest X-Ray (CXR) images, each paired with a curated single condition label. Our labeling strategy prioritizes educationally relevant findings to power a focused and effective simulated learning experience for demo purpose only. This demonstration is solely for illustrative purposes and doesn't represent a finished or approved product. It does not comply with any harmonized regulations or standards for quality, safety, or efficacy. Any real-world application would require further development, training, and adaptation.*

backend/app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import sys
+from flask import Flask, send_from_directory
+import case_util
+import config
+from llm_client import VertexAILLMClient
+from llm_client import HuggingFaceLLMClient
+from background_task_manager import BackgroundTaskManager
+from cache_manager import CacheManager
+from rag.knowledge_base import KnowledgeBase
+from rag.model_manager import ModelManager
+from rag.rag_context_engine import RAGContextEngine, format_context_messages_to_string
+from routes import main_bp
+def _get_llm_client():
+    """Initializes the LLM client and handles exit on failure."""
+    logger = logging.getLogger(__name__)
+    if config.MEDGEMMA_LOCATION == 'HUGGING_FACE':
+        logger.info("HUGGING_FACE MedGemma end point initialized.")
+        return HuggingFaceLLMClient(config.HF_TOKEN, config.MEDGEMMA_ENDPOINT_URL)
+    elif config.MEDGEMMA_LOCATION == 'VERTEX_AI':
+        logger.info("Vertex AI MedGemma end point initialized.")
+        return VertexAILLMClient(config.GCLOUD_SA_KEY, config.MEDGEMMA_ENDPOINT_URL)
+    logger.critical("LLM client failed to initialize. API calls will fail.")
+    sys.exit("Exiting: LLM client initialization failed.")
+def _initialize_rag_system(flask_app: Flask):
+    """Checks for persistent cache and initializes the RAG system."""
+    logger = logging.getLogger(__name__)
+    rag_context_cache = {}
+    # RAG Run is not needed if cache is present.
+    if config.USE_CACHE:
+        cache_manager = flask_app.config['DEMO_CACHE']
+        if len(cache_manager.cache) > 0:
+            logger.warning(f"The cache is not empty, so not initialising the RAG system.")
+            return
+        else:
+            logger.info(f"The cache is empty, so resuming the RAG initialisation")
+    try:
+        logger.info("--- Initializing RAG System and pre-fetching context... ---")
+        rag_model_manager = ModelManager()
+        rag_models = rag_model_manager.load_models()
+        if not rag_models.get("embedder"): raise RuntimeError("RAG embedder failed to load.")
+        knowledge_base = KnowledgeBase(models=rag_models)
+        knowledge_base.build(pdf_filepath=config.GUIDELINE_PDF_PATH)
+        if not knowledge_base.retriever: raise RuntimeError("Failed to build the RAG retriever.")
+        rag_engine = RAGContextEngine(knowledge_base=knowledge_base)
+        all_cases = flask_app.config.get("AVAILABLE_REPORTS", {})
+        for case_id, case_data in all_cases.items():
+            ground_truth_labels = case_data.ground_truth_labels
+            if not ground_truth_labels: continue
+            rag_queries = [label.lower() for label in ground_truth_labels.keys()]
+            if "normal" in rag_queries: continue
+            retrieved_docs = rag_engine.retrieve_context_docs_for_simple_queries(rag_queries)
+            citations = sorted(list(
+                set(doc.metadata.get("page_number") for doc in retrieved_docs if doc.metadata.get("page_number"))))
+            context_messages, _ = rag_engine.build_context_messages(retrieved_docs)
+            context_string = format_context_messages_to_string(context_messages)
+            rag_context_cache[case_id] = {"context_string": context_string, "citations": citations}
+        logger.info("✅ RAG System ready.")
+    except Exception as e:
+        logger.critical(f"FATAL: RAG System failed to initialize: {e}", exc_info=True)
+        sys.exit("Exiting: RAG system initialization failed.")
+    flask_app.config['RAG_CONTEXT_CACHE'] = rag_context_cache
+def _initialize_demo_cache(flask_app: Flask):
+    """Initializes the disk cache for MCQs and summary templates."""
+    logger = logging.getLogger(__name__)
+    if config.USE_CACHE:
+        cache_dir = os.getenv('CACHE_DIR', config.BASE_DIR / "persistent_cache")
+        cache_manager = CacheManager(cache_dir)
+        flask_app.config['DEMO_CACHE'] = cache_manager
+        logger.info("✅ Cache Setup Complete.")
+    else:
+        logger.warning("⚠️ Caching is DISABLED.")
+        flask_app.config['DEMO_CACHE'] = None
+def _register_routes(flask_app: Flask):
+    """Registers blueprints and defines static file serving."""
+    flask_app.register_blueprint(main_bp)
+    @flask_app.route('/', defaults={'path': ''})
+    @flask_app.route('/<path:path>')
+    def serve(path):
+        if path != "" and os.path.exists(os.path.join(flask_app.static_folder, path)):
+            return send_from_directory(flask_app.static_folder, path)
+        else:
+            return send_from_directory(flask_app.static_folder, 'index.html')
+def create_app():
+    """Creates and configures the Flask application by calling modular helper functions."""
+    application = Flask(__name__, static_folder=config.STATIC_DIR)
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(name)s] - %(message)s')
+    # Sequentially call setup functions
+    application.config["LLM_CLIENT"] = _get_llm_client()
+    application.config["AVAILABLE_REPORTS"] = case_util.get_available_reports(config.MANIFEST_CSV_PATH)
+    _initialize_demo_cache(application)
+    task_manager = BackgroundTaskManager()
+    application.config['TASK_MANAGER'] = task_manager
+    # RAG and Cache initialization in the background
+    task_manager.start_task(key="rag_system", target_func=_initialize_rag_system, flask_app=application)
+    _register_routes(application)
+    return application
+app = create_app()
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=True)

backend/background_task_manager.py ADDED Viewed

	@@ -0,0 +1,76 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import threading
+logger = logging.getLogger(__name__)
+class BackgroundTaskManager:
+    """A simple manager to run and track background initialization tasks."""
+    def __init__(self):
+        self.tasks = {}
+        self.results = {}
+        self.errors = {}
+        self._lock = threading.Lock()
+    def _task_wrapper(self, key, target_func, *args, **kwargs):
+        """A wrapper to run the target function and store its result or exception."""
+        logger.info(f"Background task '{key}' started.")
+        try:
+            result = target_func(*args, **kwargs)
+            with self._lock:
+                self.results[key] = result
+            logger.info(f"✅ Background task '{key}' finished successfully.")
+        except Exception as e:
+            with self._lock:
+                self.errors[key] = e
+            logger.critical(f"❌ Background task '{key}' failed with an exception.", exc_info=True)
+    def start_task(self, key, target_func, *args, **kwargs):
+        """Starts a new background task in a daemon thread."""
+        if key in self.tasks:
+            logger.warning(f"Task '{key}' is already running.")
+            return
+        thread = threading.Thread(
+            target=self._task_wrapper,
+            args=(key, target_func) + args,
+            kwargs=kwargs,
+            daemon=True  # Daemon threads exit when the main app exits
+        )
+        with self._lock:
+            self.tasks[key] = thread
+        thread.start()
+    def is_task_running(self, key):
+        """Checks if a specific task is still active."""
+        with self._lock:
+            return self.tasks.get(key) and self.tasks[key].is_alive()
+    def is_task_done(self, key):
+        """Checks if a task has completed (successfully or with an error)."""
+        with self._lock:
+            result = key in self.results or key in self.errors
+            if not result:
+                logger.info(f"self.results: {self.results}")
+                logger.info(f"self.errors: {self.errors}")
+            return result
+    def get_error(self, key):
+        """Returns the exception for a failed task, if any."""
+        with self._lock:
+            return self.errors.get(key)

backend/cache_manager.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from dataclasses import asdict
+from pathlib import Path
+import diskcache as dc
+from models import ClinicalMCQ, CaseSummary
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class CacheManager:
+    """
+    Manages a persistent, on-disk cache for the demo using diskcache.
+    This class is thread-safe and process-safe.
+    """
+    def __init__(self, cache_directory: str | Path):
+        self.cache_directory = cache_directory
+        self.cache = dc.Cache(str(cache_directory))
+        logger.info(f"✅ DemoCacheManager initialized. Cache directory: {cache_directory}")
+    def get_all_mcqs_sequence(self, case_id: str) -> list[ClinicalMCQ] | None:
+        """Retrieves the list of MCQs for a case."""
+        mcq_list = self.cache.get(f"{case_id}_full_mcqs")
+        if mcq_list is not None:
+            return [ClinicalMCQ(**data) for data in mcq_list]
+        return []
+    def add_all_mcqs_to_case(self, case_id: str, all_mcqs: list[ClinicalMCQ]):
+        """Set the list of MCQs to the given case in the cache."""
+        with self.cache.transact():
+            list_of_mcqs = [asdict(mcq) for mcq in all_mcqs]
+            self.cache.set(f"{case_id}_full_mcqs", list_of_mcqs)
+        logger.info(f"✅ Cache updated for case '{case_id}' with all MCQs.")
+    def get_summary_template(self, case_id: str) -> CaseSummary | None:
+        """Retrieves the summary template for a case."""
+        template_dict = self.cache.get(f"{case_id}_summary_template")
+        if template_dict:
+            try:
+                # The rationale will be empty in the template
+                return CaseSummary.from_dict(template_dict)
+            except (TypeError, KeyError):
+                logger.error("Deserialization of the cached summary template failed.")
+                return None
+        return None
+    def save_summary_template(self, case_id: str, template: CaseSummary):
+        """Saves a summary template to the cache."""
+        self.cache.set(f"{case_id}_summary_template", asdict(template))
+        logger.info(f"✅ Summary template saved for case '{case_id}'.")

backend/case_util.py ADDED Viewed

	@@ -0,0 +1,223 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import csv
+import json
+import logging
+import random
+import re
+from dataclasses import replace
+from pathlib import Path
+from config import BASE_DIR, RANDOMIZE_CHOICES
+from models import Case, CaseSummary, AnswerLog, ConversationTurn, QuestionOutcome, ClinicalMCQ
+# --- Configuration ---
+# Configure basic logging (optional, adjust as needed)
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def fetch_report(report_path: Path):
+    """Report file reading utility function."""
+    try:
+        with open(report_path, 'r') as f:
+            report = json.load(f)
+        logger.info(f"Successfully loaded '{report_path}' into memory.")
+        return report
+    except FileNotFoundError:
+        logger.error(f"ERROR: Could not find report file: {report_path}")
+        return ""
+def get_available_reports(reports_csv_path: Path):
+    """Reads available reports as Cases for this demo."""
+    available_reports: dict[str, Case] = {}
+    if reports_csv_path.is_file():
+        try:
+            with (open(reports_csv_path, mode='r', encoding='utf-8') as csvfile):
+                reader = csv.DictReader(csvfile)
+                required_headers = {'case_id', 'case_condition_name', 'report_path', 'download_image_url', 'findings'}
+                if not required_headers.issubset(reader.fieldnames):
+                    logger.error(
+                        f"CSV file {reports_csv_path} is missing one or more required headers: {required_headers - set(reader.fieldnames)}"
+                    )
+                else:
+                    for row in reader:
+                        case_id = row['case_id']
+                        condition_name = row['case_condition_name']
+                        report_path_from_csv = row['report_path']  # e.g., static/reports/report1.txt or empty
+                        download_image_url_from_csv = row['download_image_url']
+                        potential_findings = row['findings']
+                        # Construct absolute path for report file validation (paths from CSV are relative to BASE_DIR)
+                        abs_report_path_to_check = BASE_DIR / report_path_from_csv
+                        if not abs_report_path_to_check.is_file():
+                            logger.warning(
+                                f"Image file not found for case '{case_id}' at '{abs_report_path_to_check}'. Skipping this entry.")
+                            continue
+                        if download_image_url_from_csv is None or download_image_url_from_csv == "":
+                            logger.warning(
+                                f"Download image url not found for case '{case_id}'. Skipping this entry.")
+                            continue
+                        ground_truth_labels = fetch_report(report_path_from_csv)
+                        case = Case(
+                            id=case_id,
+                            condition_name=condition_name,
+                            ground_truth_labels=ground_truth_labels,
+                            download_image_url=download_image_url_from_csv,
+                            potential_findings=potential_findings,
+                        )
+                        available_reports[str(case_id)] = case
+                    logger.info(f"Loaded {len(available_reports)} report/image pairs from CSV.")
+        except Exception as e:
+            logger.error(f"Error reading or processing CSV file {reports_csv_path}: {e}", exc_info=True)
+    else:
+        logger.warning(f"Manifest CSV file not found at {reports_csv_path}. AVAILABLE_REPORTS will be empty.")
+    return available_reports
+def get_json_from_model_response(response_text: str) -> dict:
+    """
+    Robustly parses a JSON object from a response that may contain it
+    within a markdown code block.
+    """
+    # This regex now looks for a JSON object starting with { and ending with }
+    json_match = re.search(r"```json\s*(\{.*?\})\s*```", response_text, re.DOTALL)
+    if json_match:
+        json_str = json_match.group(1)
+        try:
+            return json.loads(json_str)
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to decode JSON after extraction: {e}")
+            raise Exception(f"Could not parse JSON from extracted block: {json_str}")
+    # Fallback if the model misses the markdown block
+    logger.warning("Could not find a ```json block. Falling back to raw search.")
+    json_match_fallback = re.search(r"(\{.*\})", response_text, re.DOTALL)
+    if json_match_fallback:
+        return json.loads(json_match_fallback.group(1))
+    raise Exception(f"Could not find or parse JSON object in the API response: {response_text}")
+def get_potential_findings(case: Case) -> str:
+    """Get potential findings for a case."""
+    return case.potential_findings
+def build_summary_template(case: Case, rag_cache: dict) -> CaseSummary:
+    """Builds summary template with static data like potential_findings, guideline_resources and condition."""
+    citation_string = ""  # Default
+    rag_data = rag_cache.get(case.id, {})
+    citations = rag_data.get("citations", [])
+    if citations:
+        citation_string = ', '.join(map(str, citations))
+    return CaseSummary(
+        med_gemma_interpretation="",
+        potential_findings=get_potential_findings(case),
+        rationale=[],
+        guideline_specific_resource=citation_string,
+        condition=case.condition_name
+    )
+def populate_rationale(summary_template: CaseSummary, conversation_history: list[ConversationTurn]) -> CaseSummary:
+    """Populates rationale and interpretation depending on user journey."""
+    correct_count = 0
+    total_questions = len(conversation_history)
+    rationale_logs = []
+    for turn in conversation_history:
+        question = turn.clinicalMcq.question
+        choices = turn.clinicalMcq.choices
+        model_answer_key = turn.clinicalMcq.answer
+        user_attempt1_key = turn.userResponse.attempt1
+        user_attempt2_key = turn.userResponse.attempt2
+        correct_answer_text = choices.get(model_answer_key, f"N/A - Model Answer Key '{model_answer_key}' not found.")
+        outcomes = []
+        if user_attempt1_key != model_answer_key and user_attempt2_key != model_answer_key:
+            user_attempt_key = user_attempt2_key if user_attempt2_key else user_attempt1_key
+            incorrect_text = choices[user_attempt_key]
+            outcomes.append(QuestionOutcome(type="Incorrect", text=incorrect_text))
+        else:
+            correct_count += 1
+        outcomes.append(QuestionOutcome(type="Correct", text=correct_answer_text))
+        rationale_logs.append(AnswerLog(question=question, outcomes=outcomes))
+    accuracy = (correct_count / total_questions) * 100 if total_questions > 0 else 0
+    if accuracy == 100:
+        interpretation = f"Wonderful job! You achieved a perfect score of {accuracy:.0f}%, correctly identifying all key findings on your first attempt."
+    elif accuracy >= 50:
+        interpretation = f"Good job. You scored {accuracy:.0f}%, showing a solid understanding of the key findings for this case."
+    else:
+        interpretation = f"This was a challenging case, and you scored {accuracy:.0f}%. More preparation is needed. Review the rationale below for details."
+    return CaseSummary(
+        med_gemma_interpretation=interpretation,
+        potential_findings=summary_template.potential_findings,
+        rationale=rationale_logs,
+        guideline_specific_resource=summary_template.guideline_specific_resource,
+        condition=summary_template.condition,
+    )
+def randomize_mcqs(original_mcqs: list[ClinicalMCQ]) -> list[ClinicalMCQ]:
+    """
+    Takes a list of clinical MCQs and randomizes their answer choices.
+    If an error occurs while randomizing a question, it returns the original question
+    in its place and continues.
+    """
+    if not RANDOMIZE_CHOICES:
+        return original_mcqs
+    randomized_questions = []
+    for q in original_mcqs:
+        try:
+            # --- Step 1: Identify the correct answer's text ---
+            # Before shuffling, we save the actual string of the correct answer.
+            correct_answer_text = q.choices[q.answer]
+            # --- Step 2: Shuffle the choice values ---
+            # We extract the choice texts into a list and shuffle them in place.
+            choice_texts = list(q.choices.values())
+            random.shuffle(choice_texts)
+            # --- Step 3: Rebuild choices and find the new answer key (Concise version) ---
+            # Pair the original sorted keys with the newly shuffled texts using zip.
+            keys = sorted(q.choices.keys())
+            new_choices = dict(zip(keys, choice_texts))
+            # Efficiently find the new key corresponding to the correct answer's text.
+            new_answer_key = next(key for key, value in new_choices.items() if value == correct_answer_text)
+            # --- Step 4: Create an updated, immutable copy of the question ---
+            # Using `dataclasses.replace` is a clean, Pythonic way to create a new
+            # instance with updated values, promoting immutability.
+            randomized_q = replace(q, choices=new_choices, answer=new_answer_key)
+            randomized_questions.append(randomized_q)
+        except Exception as e:
+            # If any error occurs (e.g., KeyError from a bad answer key),
+            # print a warning and append the original, unmodified question.
+            logger.warning(f"Warning: Could not randomize question '{q.id}'. Returning original. Error: {e}")
+            randomized_questions.append(q)
+    return randomized_questions

backend/config.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from pathlib import Path
+MEDGEMMA_LOCATION = os.environ.get("MEDGEMMA_LOCATION") # POSSIBLE VALUES are HUGGING_FACE, VERTEX_AI
+GCLOUD_SA_KEY = os.environ.get("GCLOUD_SA_KEY", None)
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+USE_CACHE = os.getenv('USE_CACHE', 'true').lower() in ('true', '1', 't')
+RANDOMIZE_CHOICES = os.getenv('RANDOMIZE_CHOICES', 'true').lower() in ('true', '1', 't')
+BASE_DIR = Path(__file__).parent.resolve()
+MEDGEMMA_ENDPOINT_URL = os.environ.get("MEDGEMMA_ENDPOINT_URL", None)
+# path to the built React app's 'dist' folder
+STATIC_DIR = BASE_DIR / 'frontend' / 'dist'
+MANIFEST_CSV_PATH = BASE_DIR / 'data' / 'reports_manifest.csv'
+MAX_NUMBER_OF_MCQ_QUESTIONS = 5
+GUIDELINE_PDF_PATH = BASE_DIR / 'data' / 'who_chestxray_guideline_9241546778_eng.pdf'

backend/data/reports/1.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"right pleural effusion": "yes"}

backend/data/reports/2.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"normal": "yes"}

backend/data/reports_manifest.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+case_id,image_type,case_condition_name,download_image_url,report_path,findings
+1,CXR,Pleural Effusion,https://huggingface.co/spaces/google/rad-learn-companion-samples/resolve/main/images/1.png,data/reports/1.txt,The findings on this Chest X-Ray are suggestive of Right-sided Pleural Effusion. This is indicated by the blunting of the right costophrenic angle and the presence of fluid in the right pleural space.
+2,CXR,No Abnormalities,https://huggingface.co/spaces/google/rad-learn-companion-samples/resolve/main/images/4.png,data/reports/2.txt,"Based on the image, it appears to be a Normal Chest X-Ray. The lungs appear clear with no obvious signs of consolidation, nodules, or masses. The heart size seems normal and there are no apparent mediastinal abnormalities."

backend/data/who_chestxray_guideline_9241546778_eng.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2620eea1f60558737f9467ebf17c659a695979a5c152d00c61fb3e25e80b278
+size 7193815

backend/default_cache/README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ Cache backup is stored here.

backend/default_cache/rad-learn-cache.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c71f42ca7294c27968449bdad1822161cc38f7f8f334523231d123d42f17826
+size 7809

backend/llm_client.py ADDED Viewed

	@@ -0,0 +1,291 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import uuid
+import requests
+from case_util import get_json_from_model_response
+from models import ClinicalMCQ
+from prompts import mcq_prompt_all_questions_with_rag
+from abc import ABC, abstractmethod
+from google.oauth2 import service_account
+logger = logging.getLogger(__name__)
+class LLMClient(ABC):
+    _api_key = None
+    _endpoint_url = None
+    def generate_all_questions(self, case_data: dict, guideline_context: str) -> list[ClinicalMCQ] | None:
+        """
+            Orchestrates the prompt creation and live LLM call to generate the list of all MCQs.
+            Receives pre-fetched RAG context as a string.
+            """
+        # 1. Create the prompt messages payload
+        messages = self._create_prompt_messages_for_all_questions(
+            image_url=case_data.get('download_image_url'),
+            ground_truth_labels=case_data.get('ground_truth_labels', {}),
+            guideline_context=guideline_context  # Pass the pre-fetched context
+        )
+        try:
+            # 2. Make the API call
+            response_dict = self._make_chat_completion_request(
+                model="tgi",  # Or your configured model
+                messages=messages,
+                temperature=0,
+                max_tokens=8192
+            )
+            # 3. Safely access the list of questions from the parsed dictionary
+            list_of_question_dicts = response_dict.get("questions", [])
+            if not list_of_question_dicts:
+                raise ValueError("LLM response did not contain a 'questions' key or the list was empty.")
+            # 4. Loop through the extracted list and create ClinicalMCQ objects
+            list_clinical_mcq = []
+            for question_dict in list_of_question_dicts:
+                if "question" not in question_dict:
+                    logger.warning("Skipping malformed question object in response.")
+                    continue
+                mcq_uuid = str(uuid.uuid4())
+                clinical_mcq = ClinicalMCQ(
+                    id=mcq_uuid,
+                    question=question_dict.get('question', ''),
+                    choices=question_dict.get('choices', {}),
+                    hint=question_dict.get('hint', ''),
+                    answer=question_dict.get('answer', ''),
+                    rationale=question_dict.get('rationale', '')
+                )
+                list_clinical_mcq.append(clinical_mcq)
+            return list_clinical_mcq
+        except Exception as e:
+            logger.error(f"Failed to generate and parse learning module: {e}")
+            return None
+    @abstractmethod
+    def _make_chat_completion_request(
+        self,
+        model: str,
+        messages: list,
+        temperature: float,
+        max_tokens: int,
+        top_p: float | None = None,
+        seed: int | None = None,
+        stop: list[str] | str | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None
+    ) -> dict | None:
+        pass
+    def _create_prompt_messages_for_all_questions(self, image_url: str, ground_truth_labels: dict, guideline_context: str):
+        """
+        Creates the list of messages for the LLM prompt.
+        Dynamically selects the prompt and constructs the payload based on whether RAG context is present.
+        """
+        # The system message sets the stage and provides all instructions/examples.
+        system_message = {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": mcq_prompt_all_questions_with_rag},
+            ]
+        }
+        user_content_text = (
+            f"<significant_clinical_conditions>\n{json.dumps(ground_truth_labels, indent=2)}\n</significant_clinical_conditions>\n\n"
+            f"<guideline_context>\n{guideline_context}\n</guideline_context>"
+        )
+        # The user message provides the specific data for THIS request and the image.
+        user_message = {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": user_content_text}
+            ]
+        }
+        messages = [system_message, user_message]
+        logger.info("Messages being sent:-\n{}".format(json.dumps(messages, indent=2)))
+        return messages
+class HuggingFaceLLMClient(LLMClient):
+    def __init__(self, _api_key, _endpoint_url):
+        if not _api_key:
+            raise ValueError("No API key provided.")
+        if not _endpoint_url:
+            raise ValueError("No endpoint URL provided.")
+        self._api_key = _api_key
+        self._endpoint_url = _endpoint_url
+    def _make_chat_completion_request(
+        self,
+        model: str,
+        messages: list,
+        temperature: float,
+        max_tokens: int,
+        top_p: float | None = None,
+        seed: int | None = None,
+        stop: list[str] | str | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None
+    ) -> dict | None:
+        headers = {
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+        }
+        payload = {
+            "model": model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": True,
+        }
+        if top_p is not None: payload["top_p"] = top_p
+        if seed is not None: payload["seed"] = seed
+        if stop is not None: payload["stop"] = stop
+        if frequency_penalty is not None: payload["frequency_penalty"] = frequency_penalty
+        if presence_penalty is not None: payload["presence_penalty"] = presence_penalty
+        temp_url = self._endpoint_url.rstrip('/')
+        if temp_url.endswith("/v1/chat/completions"):
+            full_url = temp_url
+        elif temp_url.endswith("/v1"):
+            full_url = temp_url + "/chat/completions"
+        else:
+            full_url = temp_url + "/v1/chat/completions"
+        response = requests.post(full_url, headers=headers, json=payload, timeout=60)
+        logger.info(f"LLM call status code: {response.status_code}, response: {response.reason}")
+        explanation_parts = []
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode('utf-8')
+                if decoded_line.startswith('data: '):
+                    json_data_str = decoded_line[len('data: '):].strip()
+                    if json_data_str == "[DONE]":
+                        break
+                    try:
+                        chunk = json.loads(json_data_str)
+                        if chunk.get("choices") and chunk["choices"][0].get(
+                            "delta") and chunk["choices"][0]["delta"].get(
+                            "content"):
+                            explanation_parts.append(
+                                chunk["choices"][0]["delta"]["content"])
+                    except json.JSONDecodeError:
+                        logger.warning(
+                            f"Could not decode JSON from stream chunk: {json_data_str}")
+                        # Depending on API, might need to handle partial JSON or other errors
+                elif decoded_line.strip() == "[DONE]":  # Some APIs might send [DONE] without "data: "
+                    break
+        explanation = "".join(explanation_parts).strip()
+        if not explanation:
+            logger.warning("Empty explanation from API")
+        return get_json_from_model_response(explanation)
+class VertexAILLMClient(LLMClient):
+    def __init__(self, _api_key, _endpoint_url):
+        if not _api_key:
+            raise ValueError("No API key provided.")
+        if not _endpoint_url:
+            raise ValueError("No endpoint URL provided.")
+        self._api_key = _api_key
+        self._endpoint_url = _endpoint_url
+    def _make_chat_completion_request(
+        self,
+        model: str,
+        messages: list,
+        temperature: float,
+        max_tokens: int,
+        top_p: float | None = None,
+        seed: int | None = None,
+        stop: list[str] | str | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None
+    ) -> dict | None:
+        # 1. Get credentials directly from the secret
+        creds = self._get_credentials_from_secret()
+        logger.info("Successfully loaded credentials from secret.")
+        # 2. Get a valid access token
+        token = self._get_access_token(creds)
+        logger.info("Successfully obtained access token.")
+        # 3. Use the token to make an authenticated API call
+        # Example: Calling a Vertex AI endpoint
+        headers = {
+            'Authorization': f'Bearer {token}',
+            'Content-Type': 'application/json'
+        }
+        payload = {
+            "model": model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        response = requests.post(self._endpoint_url, headers=headers, json=payload,
+                                 timeout=60)
+        logger.info(f"LLM call status code: {response.status_code}, status reason: {response.reason}")
+        response_dict = response.json()
+        final_response = response_dict["choices"][0]["message"]["content"]
+        return get_json_from_model_response(final_response)
+    def _get_credentials_from_secret(self):
+        """Loads Google Cloud credentials from an environment variable."""
+        if not self._api_key:
+            raise ValueError(
+                f"Environment variable 'GCLOUD_SA_KEY' not found. Please set it in your Hugging Face Space secrets.")
+        logger.info("Loading Google Cloud credentials...")
+        # Parse the JSON string into a dictionary
+        credentials_info = json.loads(self._api_key)
+        logger.info("Google Cloud credentials loaded.")
+        # Define the required scopes for the API you want to access
+        scopes = ['https://www.googleapis.com/auth/cloud-platform']
+        # Create credentials from the dictionary
+        credentials = service_account.Credentials.from_service_account_info(
+            credentials_info,
+            scopes=scopes
+        )
+        return credentials
+    def _get_access_token(self, credentials):
+        """Refreshes the credentials to get a valid access token."""
+        from google.auth.transport.requests import Request
+        # Refresh the token to ensure it's not expired
+        credentials.refresh(Request())
+        return credentials.token

backend/models.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class ClinicalMCQ:
+    id: str
+    question: str
+    choices: dict[str, str]
+    hint: str
+    answer: str
+    rationale: str
+@dataclass
+class Case:
+    id: str
+    condition_name: str
+    ground_truth_labels: dict[str, str]
+    download_image_url: str
+    potential_findings: str
+#### For Summary ####
+@dataclass
+class UserResponse:
+    """Represents the user's attempts for a single question."""
+    attempt1: str
+    attempt2: str | None
+@dataclass
+class ConversationTurn:
+    clinicalMcq: ClinicalMCQ
+    userResponse: UserResponse
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ConversationTurn":
+        """
+        A factory method to create a ConversationTurn instance from a dictionary.
+        This handles the nested instantiation of the other dataclasses.
+        """
+        # This will raise a TypeError or KeyError if the structure is wrong,
+        # which provides robust validation.
+        question_data = data['ModelResponse']
+        user_response_data = data['UserResponse']
+        return cls(
+            clinicalMcq=ClinicalMCQ(**question_data),
+            userResponse=UserResponse(**user_response_data)
+        )
+@dataclass
+class QuestionOutcome:
+    """Represents a single outcome line for a question."""
+    type: str  # "Correct" or "Incorrect"
+    text: str  # The actual answer text
+@dataclass
+class AnswerLog:
+    """A log detailing the user's performance on a single question for the rationale,
+    now including explicit correct and user's chosen (if incorrect) answers."""
+    question: str
+    outcomes: list[QuestionOutcome]  # A list to hold multiple outcome lines
+    @classmethod
+    def from_dict(cls, data: dict) -> "AnswerLog":
+        # Convert the list of outcome dicts into a list of QuestionOutcome objects
+        outcomes = [QuestionOutcome(**o) for o in data['outcomes']]
+        return cls(question=data['question'], outcomes=outcomes)
+@dataclass
+class CaseSummary:
+    """Represents the final, structured summary with the new fields."""
+    med_gemma_interpretation: str
+    rationale: list[AnswerLog]
+    potential_findings: str
+    guideline_specific_resource: str
+    condition: str
+    @classmethod
+    def from_dict(cls, data: dict) -> "CaseSummary":
+        # Use the AnswerLog.from_dict method to reconstruct the rationale list
+        rationale_logs = [AnswerLog.from_dict(r) for r in data['rationale']]
+        return cls(
+            med_gemma_interpretation=data['med_gemma_interpretation'],
+            rationale=rationale_logs,
+            potential_findings=data['potential_findings'],
+            guideline_specific_resource=data['guideline_specific_resource'],
+            condition=data['condition']
+        )

backend/prompts.py ADDED Viewed

	@@ -0,0 +1,165 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --- PROMPT FOR WHEN RAG IS DISABLED ---
+mcq_prompt_all_questions_with_rag = """
+You are a distinguished medical professor and an expert in radiological interpretation. You are designing a learning experience for medical students. Your signature teaching style is based on the Socratic method: you guide students from basic visual evidence to a final conclusion without giving away the answer prematurely.
+### Your Pedagogical Mandate
+Your entire goal is to teach a **process of visual analysis**, not just to test final knowledge. You will create a learning module that forces the student to build a case from the ground up.
+1.  **Observation First, Interpretation Last:** This is the core of your method. The student must first learn to SEE. Your questions will guide their eyes to specific findings on the image.
+2.  **Purposeful Rationales:** Your rationales must also follow this principle.
+    *   For **observational questions (Q1-4)**, the `rationale` must explain the **radiological principle** of the finding (e.g., "the border is obscured due to loss of silhouette against an adjacent fluid-density opacity"), not the `<significant_clinical_conditions>` it represents.
+    *   For the **final diagnostic question (Q5)**, the `rationale` can and should explain how the signs point to the specific pathology.
+3.  Since Chest X-Ray alone is not enough for concluding diagnosis, instead of using the term "diagnosis" use terms like "finding", "clinical condition", "clinical abnormality", etc.
+### Primary Task
+Your output will be a single, valid JSON object wrapped in a markdown code block (```json ... ```).
+---
+### INPUT STRUCTURE FORMAT
+You will be provided with the following inputs wrapped in XML-like tags:
+1.  **`<chest_x_ray_image>` (Image):** The uploaded Chest X-Ray image that the entire learning module must be based on. Remember, a frontal CXR image will show the right hemithorax on the left side of the image and the left hemithorax on the right side of the image.
+2.  **`<significant_clinical_conditions>` (JSON Object):** Your secret "Answer Key" containing the definitive clinical findings. This is for your guidance ONLY.
+3.  **`<guideline_context>` (Text Block):** Retrieved knowledge from a clinical guideline. This is to be used ONLY for generating the `rationale` and `hint`.
+---
+### OUTPUT JSON STRUCTURE DEFINITION
+You MUST generate a JSON object with the following top-level keys: `reasoning_steps` and `questions`.
+1.  **`reasoning_steps` (Object):** This is your internal lesson plan.
+    *   `final_clinical_conditions` (String): The conditions from `<significant_clinical_conditions>`.
+    *   `observation_pathway` (Array of Strings): An array of exactly 5 strings outlining the Socratic path, specific to the image and including laterality.
+2.  **`questions` (Array of Objects):** An array of 5 question objects that execute your lesson plan.
+    *   Each object must have the keys: `question`, `choices` (an object with A,B,C,D), `answer`, `rationale`, `hint`.
+---
+### CONTENT & LOGIC RULES
+1. **Instruction for observation pathways:**
+    *   **Core Instruction:** An array of exactly 5 strings outlining the Socratic path, specific to the image.
+    *   ** When no abnormalities are present, the pathway must confirm the normalcy of key anatomical structures in a logical order (e.g., assess technical quality, then cardiac silhouette, then lung fields, then costophrenic angles).
+    *   **Be Firm on Laterality:** The `observation_pathway` and `questions` must be specific to the side (left/right) shown in the image, using the 'L' or 'R' marker in the image as a definitive cue.
+    *   **Include helpful observations to reduce repetition:** You can also add observation pathways based on visual observations which could help rule out other common clinical conditions.
+    *   **Avoid Absolute Measurements Observations:** Since the CXR is not to scale, do not generate observation pathways which requires absolute measurements. Example: Size in cm for the width of the mediastinum. Diameter of the heart in cm.
+2.  **Question Generation via Mapping:**
+    *   ** Core Instruction:** The 5 questions you generate MUST correspond directly and in order to the 5 steps in your `observation_pathway`.
+    *   **Plausible Distractor Answer Choices:** For Q1-4, choice distractors MUST be other plausible but incorrect radiological signs. For Q5, distractors MUST be relevant differential diagnoses for the visual finding (e.g., other conditions that can look similar on the film).
+    *   **No Information Leakage (Q1-4):** The diagnostic terms from `<final_clinical_conditions>` MUST NOT appear in the `question`, `choices`, `rationale`, or `hint` for the first four questions.
+    *   **Guideline Usage:** Use the relevant parts of `<guideline_context>` ONLY to generate the `rationale` and `hint`, and not the question text itself. Do not include the `<final_clinical_conditions>` in the the rationale or the hint.
+    *   **Conciseness:** The `rationale` and `hint` strings MUST NOT exceed 30 words.
+    *   **Relevance to X-Ray Image:** The questions **must** be relevant to the X-Ray image provided.
+    *   **5th Question Instructions:** Ask the student to **synthesize the different observations** made earlier and provide a list of options consisting of the expected clinical condition along with 3 other viable options. This should be done even if the X-Ray image is normal.
+---
+### COMPLETE EXAMPLE (Demonstrating All Rules)
+**LIVE INPUT:**
+<significant_clinical_conditions>
+{"left middle lobe pneumonia": "yes"}
+</significant_clinical_conditions>
+<guideline_context>
+Pneumonia is an inflammatory condition of the lung primarily affecting the small air sacs (alveoli). On a chest X-ray, look for areas of consolidation, which appear as ill-defined increased opacities (whiteness), sometimes with air bronchograms (dark, branching airways visible within the white consolidation).
+</guideline_context>
+**OUTPUT:**
+```json
+{
+  "reasoning_steps": {
+    "final_clinical_conditions": "Left Middle Lobe Pneumonia",
+    "observation_pathway": [
+      "Assess the overall technical quality and patient positioning of the radiograph.",
+      "Identify areas of increased opacity (whiteness) within the lung fields.",
+      "Localize the increased opacity to a specific lobe, paying attention to the borders and effacement of normal structures.",
+      "Look for associated signs such as air bronchograms or volume loss.",
+      "Synthesize the evidence to determine the final findings."
+    ]
+  },
+  "questions": [
+    {
+      "question": "Which of the following best describes the technical quality of this radiograph?",
+      "choices": {
+        "A": "Significant patient rotation is present.",
+        "B": "Adequate inspiration and penetration",
+        "C": "The image is significantly under-penetrated.",
+        "D": "It is an AP supine view, not a PA upright view."
+      },
+      "answer": "B",
+      "rationale": "The film shows clear lung markings where present and adequate visibility of the thoracic spine, indicating proper exposure.",
+      "hint": "Assess if you can see the vertebrae behind the heart and count the posterior ribs visible above the diaphragm."
+    },
+    {
+      "question": "What change in opacity is noted in the left mid-lung zone?",
+      "choices": {
+        "A": "It is significantly more lucent (blacker).",
+        "B": "There is a discrete, well-circumscribed nodule.",
+        "C": "There is an ill-defined area of increased opacity.",
+        "D": "No significant change in opacity is visible."
+      },
+      "answer": "C",
+      "rationale": "Increased opacity suggests consolidation, which is a key finding in certain lung conditions.",
+      "hint": "Focus on the general whiteness or grayness of the lung parenchyma compared to normal lung."
+    },
+    {
+      "question": "Which of the following describes the appearance of the left heart border?",
+      "choices": {
+        "A": "It is sharply demarcated.",
+        "B": "It is completely obscured or silhouetted.",
+        "C": "It is displaced laterally.",
+        "D": "It is less prominent than usual."
+      },
+      "answer": "B",
+      "rationale": "Loss of definition of a normal anatomical border (silhouette sign) suggests an abnormality in the adjacent lung segment.",
+      "hint": "Observe if the outline of the left side of the heart is clearly visible or if it blends into the surrounding opacity."
+    },
+    {
+      "question": "Are there any visible air bronchograms within the area of increased opacity?",
+      "choices": {
+        "A": "Yes, lucent branching structures are seen within the opacity.",
+        "B": "No, the opacity is uniformly dense.",
+        "C": "Only fluid levels are visible.",
+        "D": "The opacity is too faint to assess for air bronchograms."
+      },
+      "answer": "A",
+      "rationale": "Air bronchograms indicate that the airspaces are filled with fluid or exudate, but the bronchi remain patent, a classic sign of consolidation.",
+      "hint": "Look for dark, branching, tubular structures against the background of the white consolidation."
+    },
+    {
+      "question": "Synthesizing the observations of increased opacity in the left mid-lung zone, obscuration of the left heart border, and presence of air bronchograms, what is the most likely finding?",
+      "choices": {
+        "A": "Left-sided pleural effusion",
+        "B": "Left Middle Lobe Pneumonia",
+        "C": "Left upper lobe collapse",
+        "D": "Left lower lobe atelectasis"
+      },
+      "answer": "B",
+      "rationale": "The combination of consolidation in the left mid-lung zone, silhouetting of the left heart border (due to involvement of the left middle lobe), and air bronchograms is highly characteristic of pneumonia affecting the left middle lobe.",
+      "hint": "The 'silhouette sign' is crucial for localizing the pathology."
+    }
+  ]
+}
+```
+---
+### LIVE TASK
+Now, apply your expert Socratic teaching method. Generate a single JSON object for the following live inputs, strictly adhering to all structure, content, and logic rules defined above.
+**LIVE INPUT:**
+<chest_x_ray_image>
+"""

backend/rag/__init__.py ADDED Viewed

File without changes

backend/rag/knowledge_base.py ADDED Viewed

	@@ -0,0 +1,568 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import concurrent.futures
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Dict, List
+import fitz  # PyMuPDF
+from PIL import Image
+from langchain.docstore.document import Document as LangchainDocument
+from langchain.retrievers import BM25Retriever, EnsembleRetriever
+from langchain.text_splitter import NLTKTextSplitter
+from langchain_community.vectorstores import Chroma
+from tqdm import tqdm
+logger = logging.getLogger(__name__)
+IMAGE_SUMMARY_PROMPT = """Summarize key findings in this image."""
+class KnowledgeBase:
+    """Processes a source PDF and builds a self-contained, searchable RAG knowledge base."""
+    def __init__(self, models: dict, config_overrides: dict | None = None):
+        """Initializes the builder with necessary models and configuration."""
+        self.embedder = models.get("embedder")
+        self.ner_pipeline = models.get("ner_pipeline")
+        # Set default config and apply any overrides
+        self.config = self._get_default_config()
+        if config_overrides:
+            self.config.update(config_overrides)
+        # For consistent chunking, the RAG query uses the same enriching and chunking logic as the knowledge base.
+        self.document_enricher = self._enrich_documents
+        self.chunker = self._create_chunks_from_documents
+        self.retriever: EnsembleRetriever | None = None
+        self.page_map: Dict[int, Dict] = {}
+        self.source_filepath = ""
+        # Create necessary directories from config
+        Path(self.config["IMAGE_DIR"]).mkdir(parents=True, exist_ok=True)
+        Path(self.config["CHROMA_PERSIST_DIR"]).mkdir(parents=True, exist_ok=True)
+    def _get_default_config(self):
+        """Returns the default configuration for the KnowledgeBase."""
+        return {
+            "IMAGE_DIR": Path("processed_figures_kb/"),
+            "CHROMA_PERSIST_DIR": Path("chroma_db_store/"),
+            "MEDICAL_ENTITY_TYPES_TO_EXTRACT": ["PROBLEM"],
+            "EXTRACT_IMAGE_SUMMARIES": False,  # Disabled as we don't load the LLM here
+            "FILTER_FIRST_PAGES": 6,
+            "FIGURE_MIN_WIDTH": 30,
+            "FIGURE_MIN_HEIGHT": 30,
+            "SENTENCE_CHUNK_SIZE": 250,
+            "CHUNK_FILTER_SIZE": 20,
+            "RETRIEVER_TOP_K": 20,
+            "ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER": [0.2, 0.3, 0.5],
+            "SENTENCE_SCORE_THRESHOLD": 0.6,
+            "NER_SCORE_THRESHOLD": 0.5,
+            "MAX_PARALLEL_WORKERS": 16,
+        }
+    def build(self, pdf_filepath: str):
+        """The main public method to build the knowledge base from a PDF."""
+        logger.info(f"--------- Building Knowledge Base from '{pdf_filepath}' ---------")
+        pdf_path = Path(pdf_filepath)
+        if not pdf_path.exists():
+            logger.error(f"ERROR: PDF file not found at {pdf_filepath}")
+            return None
+        self.source_filepath = pdf_path
+        # Step 1: Process the PDF and build the structured page_map.
+        self.page_map = self._process_and_structure_pdf(pdf_path)
+        all_docs = [
+            doc for page_data in self.page_map.values() for doc in page_data["blocks"]
+        ]
+        # Step 2: Enrich documents with NER metadata.
+        enriched_docs = self._enrich_documents(all_docs, self.config.get("EXTRACT_IMAGE_SUMMARIES", False))
+        # Step 3: Chunk the enriched documents into final searchable units.
+        final_chunks = self._create_chunks_from_documents(enriched_docs)
+        # Step 4: Build the final ensemble retriever.
+        self.retriever = self._build_ensemble_retriever(final_chunks)
+        if self.retriever:
+            logger.info(f"--------- Knowledge Base Built Successfully ---------")
+        else:
+            logger.error(f"--------- Knowledge Base Building Failed ---------")
+        return self
+    # --- Step 1: PDF Content Extraction ---
+    def _process_and_structure_pdf(self, pdf_path: Path) -> dict:
+        """Processes a PDF in parallel and directly builds the final page_map.
+        This version is more efficient by opening the PDF only once.
+        """
+        logger.info("Step 1: Processing PDF and building structured page map...")
+        page_map = {}
+        try:
+            # Improvement: Open the PDF ONCE to get all preliminary info
+            with fitz.open(pdf_path) as doc:
+                pdf_bytes_buffer = doc.write()
+                page_count = len(doc)
+                toc = doc.get_toc()
+                # Improvement: Create a more robust chapter lookup map
+                page_to_chapter_id = {}
+                if toc:
+                    chapters = [item for item in toc if item[0] == 1]
+                    for i, (lvl, title, start_page) in enumerate(chapters):
+                        end_page = (
+                            chapters[i + 1][2] - 1 if i + 1 < len(chapters) else page_count
+                        )
+                        for page_num in range(start_page, end_page + 1):
+                            page_to_chapter_id[page_num] = i
+                # Create tasks for the thread pool (using a tuple as requested)
+                tasks = [
+                    (
+                        pdf_bytes_buffer,
+                        i,
+                        self.config,
+                        pdf_path.name,
+                        page_to_chapter_id,
+                    )
+                    for i in range(self.config["FILTER_FIRST_PAGES"], page_count)
+                ]
+            # Parallel Processing
+            num_workers = min(
+                self.config["MAX_PARALLEL_WORKERS"], os.cpu_count() or 1
+            )
+            with concurrent.futures.ThreadPoolExecutor(
+                    max_workers=num_workers
+            ) as executor:
+                futures = [
+                    executor.submit(self.process_single_page, task) for task in tasks
+                ]
+                progress_bar = tqdm(
+                    concurrent.futures.as_completed(futures),
+                    total=len(tasks),
+                    desc="Processing & Structuring Pages",
+                )
+                for future in progress_bar:
+                    result = future.result()
+                    if result:
+                        # The worker now returns a fully formed dictionary for the page_map
+                        page_map[result["page_num"]] = result["content"]
+        except Exception as e:
+            logger.error(f"❌ Failed to process PDF {pdf_path.name}: {e}")
+            return {}
+        logger.info(f"✅ PDF processed. Created a map of {len(page_map)} pages.")
+        return dict(sorted(page_map.items()))
+    # --- Step 2: Document Enrichment ---
+    def _enrich_documents(
+            self, docs: List[LangchainDocument], summarize: bool = False
+    ) -> List[LangchainDocument]:
+        """Enriches a list of documents with NER metadata and image summaries."""
+        logger.info("\nStep 2: Enriching documents...")
+        # NER Enrichment
+        if self.ner_pipeline:
+            logger.info("Adding NER metadata...")
+            for doc in tqdm(docs, desc="Enriching with NER"):
+                # 1. Skip documents that have no actual text content
+                if not doc.page_content or not doc.page_content.strip():
+                    continue
+                try:
+                    # 2. Process ONLY the text of the current document
+                    processed_doc = self.ner_pipeline(doc.page_content)
+                    # 3. Extract entities from the result. This result now
+                    #    unambiguously belongs to the current 'doc'.
+                    entities = [
+                        ent.text
+                        for ent in processed_doc.ents
+                        if ent.type in self.config["MEDICAL_ENTITY_TYPES_TO_EXTRACT"]
+                    ]
+                    # 4. Assign the correctly mapped entities to the document's metadata
+                    if entities:
+                        # Using set() handles duplicates before sorting and joining
+                        unique_entities = sorted(list(set(entities)))
+                        doc.metadata["block_ner_entities"] = ", ".join(unique_entities)
+                except Exception as e:
+                    # Add error handling for robustness in case a single block fails
+                    logger.warning(
+                        f"\nWarning: Could not process NER for a block on page {doc.metadata.get('page_number', 'N/A')}: {e}")
+        # Image Summary Enrichment
+        if summarize:
+            logger.info("Generating image summaries...")
+            docs_with_figures = [
+                doc for doc in docs if "linked_figure_path" in doc.metadata
+            ]
+            for doc in tqdm(docs_with_figures, desc="Summarizing Images"):
+                try:
+                    img = Image.open(doc.metadata["linked_figure_path"]).convert("RGB")
+                    summary = self._summarize_image(img)
+                    if summary:
+                        doc.metadata["image_summary"] = summary
+                except Exception as e:
+                    logger.warning(
+                        "Warning: Could not summarize image"
+                        f" {doc.metadata.get('linked_figure_path', '')}: {e}"
+                    )
+        return docs
+    def _summarize_image(self, pil_image: Image.Image) -> str:
+        """Helper method to call the LLM for image summarization."""
+        if not self.llm_pipeline:
+            return ""
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": IMAGE_SUMMARY_PROMPT},
+                {"type": "image", "image": pil_image},
+            ],
+        }]
+        try:
+            output = self.llm_pipeline(text=messages, max_new_tokens=150)
+            return output[0]["generated_text"][-1]["content"].strip()
+        except Exception:
+            return ""
+    # --- Step 3: Document Chunking ---
+    def _create_chunks_from_documents(
+            self, enriched_docs: List[LangchainDocument], display_results: bool = True
+    ) -> List[LangchainDocument]:
+        """Takes enriched documents and creates the final list of chunks for indexing.
+        This method now has a single responsibility: chunking.
+        """
+        if display_results:
+            logger.info("\nStep 3: Creating final chunks...")
+        # Sentence Splitting
+        if display_results:
+            logger.info("Applying NLTK Sentence Splitting...")
+        splitter = NLTKTextSplitter(chunk_size=self.config["SENTENCE_CHUNK_SIZE"])
+        sentence_chunks = splitter.split_documents(enriched_docs)
+        if display_results:
+            logger.info(f"Generated {len(sentence_chunks)} sentence-level chunks.")
+        # NER Entity Chunking (based on previously enriched metadata)
+        if display_results:
+            logger.info("Creating NER Entity Chunks...")
+        ner_entity_chunks = [
+            LangchainDocument(
+                page_content=entity,
+                metadata={**doc.metadata, "chunk_type": "ner_entity_standalone"},
+            )
+            for doc in enriched_docs
+            if (entities_str := doc.metadata.get("block_ner_entities"))
+            for entity in entities_str.split(", ")
+            if entity
+        ]
+        if display_results:
+            logger.info(f"Added {len(ner_entity_chunks)} NER entity chunks.")
+        all_chunks = sentence_chunks + ner_entity_chunks
+        return [chunk for chunk in all_chunks if chunk.page_content]
+    # --- Step 4: Retriever Building ---
+    def _build_ensemble_retriever(
+            self, chunks: List[LangchainDocument]
+    ) -> EnsembleRetriever | None:
+        """Builds the final ensemble retriever from the chunks.
+        This method was already well-focused.
+        """
+        if not chunks:
+            logger.error("No chunks to build retriever from.")
+            return None
+        logger.info("\nStep 4: Building specialized retrievers...")
+        sentence_chunks = [
+            doc
+            for doc in chunks
+            if doc.metadata.get("chunk_type") != "ner_entity_standalone"
+        ]
+        ner_chunks = [
+            doc
+            for doc in chunks
+            if doc.metadata.get("chunk_type") == "ner_entity_standalone"
+        ]
+        retrievers, weights = [], []
+        if sentence_chunks:
+            bm25_retriever = BM25Retriever.from_documents(sentence_chunks)
+            bm25_retriever.k = self.config["RETRIEVER_TOP_K"]
+            retrievers.append(bm25_retriever)
+            weights.append(self.config["ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER"][0])
+            sentence_vs = Chroma.from_documents(
+                documents=sentence_chunks,
+                embedding=self.embedder,
+                persist_directory=str(
+                    self.config["CHROMA_PERSIST_DIR"] / "sentences"
+                ),
+            )
+            vector_retriever = sentence_vs.as_retriever(
+                search_type="similarity_score_threshold",
+                search_kwargs={
+                    "k": self.config["RETRIEVER_TOP_K"],
+                    "score_threshold": self.config["SENTENCE_SCORE_THRESHOLD"],
+                },
+            )
+            retrievers.append(vector_retriever)
+            weights.append(self.config["ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER"][1])
+        if ner_chunks:
+            ner_vs = Chroma.from_documents(
+                documents=ner_chunks,
+                embedding=self.embedder,
+                persist_directory=str(self.config["CHROMA_PERSIST_DIR"] / "entities"),
+            )
+            ner_retriever = ner_vs.as_retriever(
+                search_type="similarity_score_threshold",
+                search_kwargs={
+                    "k": self.config["RETRIEVER_TOP_K"],
+                    "score_threshold": self.config["NER_SCORE_THRESHOLD"],
+                },
+            )
+            retrievers.append(ner_retriever)
+            weights.append(self.config["ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER"][2])
+        if not retrievers:
+            logger.error("⚠️ Could not create any retrievers.")
+            return None
+        logger.info(f"Creating final ensemble with weights: {weights}")
+        return EnsembleRetriever(retrievers=retrievers, weights=weights)
+    @staticmethod
+    def process_single_page(args_tuple: tuple) -> dict | None:
+        """Worker function for parallel PDF processing.
+        Processes one page and returns a structured dictionary for that page.
+        """
+        # Unpack arguments (still using a tuple as requested)
+        pdf_bytes_buffer, page_num_idx, config, pdf_filename, page_to_chapter_id = (
+            args_tuple
+        )
+        lc_documents = []
+        page_num = page_num_idx + 1
+        try:
+            # Improvement: Use a 'with' statement for resource management
+            with fitz.open(stream=pdf_bytes_buffer, filetype="pdf") as doc:
+                page = doc[page_num_idx]
+                # 1. Extract raw, potentially fragmented text blocks
+                raw_text_blocks = page.get_text("blocks", sort=True)
+                # 2. Immediately merge blocks into paragraphs >>>
+                paragraph_blocks = KnowledgeBase._merge_text_blocks(raw_text_blocks)
+                # 3. Process figures (no change)
+                page_figures = []
+                for fig_j, path_dict in enumerate(page.get_drawings()):
+                    bbox = path_dict["rect"]
+                    if (
+                            bbox.is_empty
+                            or bbox.width < config["FIGURE_MIN_WIDTH"]
+                            or bbox.height < config["FIGURE_MIN_HEIGHT"]
+                    ):
+                        continue
+                    # Improvement: More concise bounding box padding
+                    padded_bbox = bbox + (-2, -2, 2, 2)
+                    padded_bbox.intersect(page.rect)
+                    if padded_bbox.is_empty:
+                        continue
+                    pix = page.get_pixmap(clip=padded_bbox, dpi=150)
+                    if pix.width > 0 and pix.height > 0:
+                        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+                        img_path = (
+                                config["IMAGE_DIR"]
+                                / f"{Path(pdf_filename).stem}_p{page_num}_fig{fig_j + 1}.png"
+                        )
+                        img.save(img_path)
+                        page_figures.append({
+                            "bbox": bbox,
+                            "path": str(img_path),
+                            "id": f"Figure {fig_j + 1} on {pdf_filename}, page {page_num}",
+                        })
+                # 4. Process the clean PARAGRAPH blocks
+                text_blocks_on_page = [
+                    {
+                        "bbox": fitz.Rect(x0, y0, x1, y1),
+                        "text": text.strip(),
+                        "original_idx": b_idx,
+                    }
+                    for b_idx, (x0, y0, x1, y1, text, _, _) in enumerate(
+                        paragraph_blocks
+                    )
+                    if text.strip()
+                ]
+                # 5. Link captions and create documents
+                potential_captions = [
+                    b
+                    for b in text_blocks_on_page
+                    if re.match(r"^\s*Figure\s*\d+", b["text"], re.I)
+                ]
+                mapped_caption_indices = set()
+                for fig_data in page_figures:
+                    cap_text, cap_idx = KnowledgeBase.find_best_caption_for_figure(
+                        fig_data["bbox"], potential_captions
+                    )
+                    if cap_text and cap_idx not in mapped_caption_indices:
+                        mapped_caption_indices.add(cap_idx)
+                        metadata = {
+                            "source_pdf": pdf_filename,
+                            "page_number": page_num,
+                            "chunk_type": "figure-caption",
+                            "linked_figure_path": fig_data["path"],
+                            "linked_figure_id": fig_data["id"],
+                            "block_id": f"{page_num}_{cap_idx}",
+                            "original_block_text": cap_text,
+                        }
+                        lc_documents.append(
+                            LangchainDocument(page_content=cap_text, metadata=metadata)
+                        )
+                for block_data in text_blocks_on_page:
+                    if block_data["original_idx"] in mapped_caption_indices:
+                        continue
+                    if KnowledgeBase.should_filter_text_block(
+                            block_data["text"],
+                            block_data["bbox"],
+                            page.rect.height,
+                            config["CHUNK_FILTER_SIZE"],
+                    ):
+                        continue
+                    metadata = {
+                        "source_pdf": pdf_filename,
+                        "page_number": page_num,
+                        "chunk_type": "text_block",
+                        "block_id": f"{page_num}_{block_data['original_idx']}",
+                        "original_block_text": block_data["text"],
+                    }
+                    lc_documents.append(
+                        LangchainDocument(
+                            page_content=block_data["text"], metadata=metadata
+                        )
+                    )
+        except Exception as e:
+            logger.error(f"Error processing {pdf_filename} page {page_num}: {e}")
+            return None
+        if not lc_documents:
+            return None
+        # Structure the final output
+        lc_documents.sort(
+            key=lambda d: int(d.metadata.get("block_id", "0_0").split("_")[-1])
+        )
+        return {
+            "page_num": page_num,
+            "content": {
+                "chapter_id": page_to_chapter_id.get(page_num, -1),
+                "blocks": lc_documents,
+            },
+        }
+    @staticmethod
+    def _merge_text_blocks(blocks: list) -> list:
+        """Intelligently merges fragmented text blocks into coherent paragraphs."""
+        if not blocks:
+            return []
+        merged_blocks = []
+        current_text = ""
+        current_bbox = fitz.Rect()
+        sentence_enders = {".", "?", "!", "•"}
+        for i, block in enumerate(blocks):
+            block_text = block[4].strip()
+            if not current_text:  # Starting a new paragraph
+                current_bbox = fitz.Rect(block[:4])
+                current_text = block_text
+            else:  # Continue existing paragraph
+                current_bbox.include_rect(block[:4])
+                current_text = f"{current_text} {block_text}"
+            is_last_block = i == len(blocks) - 1
+            ends_with_punctuation = block_text.endswith(tuple(sentence_enders))
+            if ends_with_punctuation or is_last_block:
+                merged_blocks.append((
+                    current_bbox.x0,
+                    current_bbox.y0,
+                    current_bbox.x1,
+                    current_bbox.y1,
+                    current_text,
+                    len(merged_blocks),
+                    0,
+                ))
+                current_text = ""
+        return merged_blocks
+    @staticmethod
+    def should_filter_text_block(
+            block_text: str,
+            block_bbox: fitz.Rect,
+            page_height: float,
+            filter_size: int,
+    ) -> bool:
+        """Determines if a text block from a header/footer should be filtered out."""
+        is_in_header_area = block_bbox.y0 < (page_height * 0.10)
+        is_in_footer_area = block_bbox.y1 > (page_height * 0.80)
+        is_short_text = len(block_text) < filter_size
+        return (is_in_header_area or is_in_footer_area) and is_short_text
+    @staticmethod
+    def find_best_caption_for_figure(
+            figure_bbox: fitz.Rect, potential_captions_on_page: list
+    ) -> tuple:
+        """Finds the best caption for a given figure based on proximity and alignment."""
+        best_caption_info = (None, -1)
+        min_score = float("inf")
+        for cap_info in potential_captions_on_page:
+            cap_bbox = cap_info["bbox"]
+            # Heuristic: Score captions directly below the figure
+            if cap_bbox.y0 >= figure_bbox.y1 - 10:  # Caption starts below the figure
+                vertical_dist = cap_bbox.y0 - figure_bbox.y1
+                # Calculate horizontal overlap
+                overlap_x_start = max(figure_bbox.x0, cap_bbox.x0)
+                overlap_x_end = min(figure_bbox.x1, cap_bbox.x1)
+                if (
+                        overlap_x_end - overlap_x_start
+                ) > 0:  # If they overlap horizontally
+                    fig_center_x = (figure_bbox.x0 + figure_bbox.x1) / 2
+                    cap_center_x = (cap_bbox.x0 + cap_bbox.x1) / 2
+                    horizontal_center_dist = abs(fig_center_x - cap_center_x)
+                    # Score is a combination of vertical and horizontal distance
+                    score = vertical_dist + (horizontal_center_dist * 0.5)
+                    if score < min_score:
+                        min_score = score
+                        best_caption_info = (cap_info["text"], cap_info["original_idx"])
+        return best_caption_info

backend/rag/model_manager.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import sys
+import config
+import nltk
+import stanza
+import torch
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from .siglip_embedder import CustomSigLipEmbeddings
+logger = logging.getLogger(__name__)
+EMBEDDING_MODEL_ID = os.environ.get("EMBEDDING_MODEL_ID", None)
+class ModelManager:
+    """Handles the expensive, one-time setup of downloading and loading all AI models required for RAG."""
+    def __init__(self):
+        # Configuration for model identifiers
+        self.embedding_model_id = EMBEDDING_MODEL_ID
+        self.stanza_ner_package = "mimic"
+        self.stanza_ner_processor = "i2b2"
+    def load_models(self) -> dict:
+        """
+        Initializes and returns a dictionary of model components.
+        Note: The main LLM is accessed via API and is NOT loaded here.
+        """
+        logger.info("--- Initializing RAG-specific Models (Embedder, NER) ---")
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {device} for RAG models")
+        models = {}
+        # 1. Load Embedder
+        try:
+            logger.info(f"Loading embedding model: {self.embedding_model_id}")
+            if "siglip" in self.embedding_model_id:
+                models["embedder"] = CustomSigLipEmbeddings(
+                    siglip_model_name=self.embedding_model_id,
+                    device=device,
+                    normalize_embeddings=True,
+                )
+            else:
+                models['embedder'] = HuggingFaceEmbeddings(
+                    model_name=self.embedding_model_id,
+                    model_kwargs={"device": device},
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+            logger.info("✅ Embedding model loaded successfully.")
+        except Exception as e:
+            logger.error(f"⚠️ Failed to load embedding model: {e}", exc_info=True)
+            sys.exit(1)
+            models['embedder'] = None
+        # 2. Load Stanza for NER
+        try:
+            logger.info("Downloading NLTK and Stanza models...")
+            stanza.download(
+                "en",
+                package=self.stanza_ner_package,
+                processors={"ner": self.stanza_ner_processor},
+                verbose=False,
+            )
+            logger.info("✅ Stanza models downloaded.")
+            logger.info("Loading Stanza NER Pipeline...")
+            models['ner_pipeline'] = stanza.Pipeline(
+                lang="en",
+                package=self.stanza_ner_package,
+                processors={"ner": "i2b2"},
+                use_gpu=torch.cuda.is_available(),
+                verbose=False,
+                tokenize_no_ssplit=True,
+            )
+            logger.info("✅ Stanza NER Pipeline loaded successfully.")
+        except Exception as e:
+            logger.error(f"⚠️ Failed to set up Stanza NER pipeline: {e}", exc_info=True)
+            models['ner_pipeline'] = None
+        if all(models.values()):
+            logger.info("\n✅ All RAG-specific models initialized successfully.")
+        else:
+            logger.error("\n⚠️ One or more RAG models failed to initialize. Check errors above.")
+        return models

backend/rag/rag_context_engine.py ADDED Viewed

	@@ -0,0 +1,226 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from typing import List
+from PIL import Image
+from langchain.docstore.document import Document as LangchainDocument
+from .knowledge_base import KnowledgeBase
+logger = logging.getLogger(__name__)
+def format_context_messages_to_string(context_messages: list[dict]) -> str:
+    """Takes a list of context message dicts and formats them into a single string."""
+    if not context_messages:
+        return "No relevant context was retrieved from the guideline document."
+    full_text = [
+        msg.get("text", "") for msg in context_messages if msg.get("type") == "text"
+    ]
+    return "\n".join(full_text)
+class RAGContextEngine:
+    """Uses a pre-built KnowledgeBase to retrieve and format context for queries."""
+    def __init__(self, knowledge_base: KnowledgeBase, config_overrides: dict | None = None):
+        if not isinstance(knowledge_base, KnowledgeBase) or not knowledge_base.retriever:
+            raise ValueError("An initialized KnowledgeBase with a built retriever is required.")
+        self.kb = knowledge_base
+        self.config = self._get_default_config()
+        if config_overrides:
+            self.config.update(config_overrides)
+    def _get_default_config(self):
+        return {
+            "FINAL_CONTEXT_TOP_K": 5,
+            "CONTEXT_SELECTION_STRATEGY": "chapter_aware_window_expansion",
+            "CONTEXT_WINDOW_SIZE": 0,
+            "ADD_MAPPED_FIGURES_TO_PROMPT": False,
+        }
+    def get_context_messages(self, query_text: str) -> list[dict] | None:
+        """Public API to get final, formatted context messages for a long query."""
+        final_context_docs = self.retrieve_context_docs(query_text)
+        if not final_context_docs:
+            logger.warning(f"No relevant context found for query: {query_text}")
+            return None
+        context_messages, _ = self.build_context_messages(final_context_docs)
+        return context_messages
+    def retrieve_context_docs(self, query_text: str) -> list:
+        """Handles both short and long queries to retrieve context documents."""
+        logger.info(f"Retrieving context documents with query: {query_text}")
+        if len(query_text.split()) > 5:
+            logger.info("Long query detected. Decomposing into sub-queries...")
+            temp_doc = LangchainDocument(page_content=query_text)
+            enriched_temp_docs = self.kb.document_enricher([temp_doc], summarize=False)
+            query_chunks_as_docs = self.kb.chunker(enriched_docs=enriched_temp_docs, display_results=False)
+            sub_queries = list(set([doc.page_content for doc in query_chunks_as_docs]))
+        else:
+            logger.info("Short query detected. Using direct retrieval.")
+            sub_queries = [query_text]
+        return self.retrieve_context_docs_for_simple_queries(sub_queries)
+    def get_context_messages_for_simple_queries(self, queries: list[str]) -> list:
+        """Retrieves context docs and builds them into formatted messages."""
+        final_context_docs = self.retrieve_context_docs_for_simple_queries(queries)
+        if not final_context_docs:
+            logger.warning(f"No relevant context found for queries: {queries}")
+            return []
+        context_messages, _ = self.build_context_messages(final_context_docs)
+        return context_messages
+    def retrieve_context_docs_for_simple_queries(self, queries: list[str]) -> list:
+        """Invokes the retriever for a list of simple queries and selects the final documents."""
+        logger.info(f"Retrieving context documents with simple queries: {queries}")
+        retrieved_docs = []
+        for query in queries:
+            docs = self.kb.retriever.invoke(query)
+            retrieved_docs.extend(docs)
+        return RAGContextEngine.select_final_context(
+            retrieved_docs=retrieved_docs,
+            config=self.config,
+            page_map=self.kb.page_map,
+        )
+    def build_context_messages(
+            self, docs: List[LangchainDocument]
+    ) -> tuple[list[dict], list[Image.Image]]:
+        """Builds a structured list of messages by grouping consecutive text blocks."""
+        if not docs:
+            return [], []
+        context_messages = []
+        images_found = []
+        prose_buffer = []
+        def flush_prose_buffer():
+            if prose_buffer:
+                full_prose = "\n\n".join(prose_buffer)
+                context_messages.append({"type": "text", "text": full_prose})
+                prose_buffer.clear()
+        add_images = self.config.get("ADD_MAPPED_FIGURES_TO_PROMPT", False)
+        for i, doc in enumerate(docs):
+            current_page = doc.metadata.get("page_number")
+            is_new_page = (i > 0) and (current_page != docs[i - 1].metadata.get("page_number"))
+            is_caption = doc.metadata.get("chunk_type") == "figure-caption"
+            if is_new_page or (add_images and is_caption):
+                flush_prose_buffer()
+            if add_images and is_caption:
+                source_info = f"--- Source: Page {current_page} ---"
+                caption_text = f"{source_info}\n{doc.page_content}"
+                context_messages.append({"type": "text", "text": caption_text})
+                image_path = doc.metadata.get("linked_figure_path")
+                if image_path and os.path.exists(image_path):
+                    try:
+                        image = Image.open(image_path).convert("RGB")
+                        context_messages.append({"type": "image", "image": image})
+                        images_found.append(image)
+                    except Exception as e:
+                        logger.warning(f"Could not load image {image_path}: {e}")
+            else:
+                if not prose_buffer:
+                    source_info = f"--- Source: Page {current_page} ---"
+                    prose_buffer.append(f"\n{source_info}\n")
+                prose_buffer.append(doc.page_content)
+        flush_prose_buffer()
+        return context_messages, images_found
+    @staticmethod
+    def select_final_context(retrieved_docs: list, config: dict, page_map: dict) -> list:
+        """Selects final context from retrieved documents using the specified strategy."""
+        strategy = config.get("CONTEXT_SELECTION_STRATEGY")
+        top_k = config.get("FINAL_CONTEXT_TOP_K", 5)
+        def _calculate_block_frequencies(docs_list: list) -> list:
+            blocks = {}
+            for doc in docs_list:
+                if block_id := doc.metadata.get("block_id"):
+                    if block_id not in blocks:
+                        blocks[block_id] = []
+                    blocks[block_id].append(doc)
+            return sorted(blocks.items(), key=lambda item: len(item[1]), reverse=True)
+        def _expand_chunks_to_blocks(chunks: list) -> list:
+            return [
+                LangchainDocument(
+                    page_content=c.metadata.get("original_block_text", c.page_content),
+                    metadata=c.metadata,
+                )
+                for c in chunks
+            ]
+        final_context = []
+        if strategy == "chapter_aware_window_expansion":
+            if not retrieved_docs or not page_map:
+                return []
+            scored_blocks = _calculate_block_frequencies(retrieved_docs)
+            if not scored_blocks:
+                return _expand_chunks_to_blocks(retrieved_docs[:top_k])
+            primary_hit_page = scored_blocks[0][1][0].metadata.get("page_number")
+            important_pages = {
+                c[0].metadata.get("page_number")
+                for _, c in scored_blocks[:top_k]
+                if c and c[0].metadata.get("page_number")
+            }
+            window_size = config.get("CONTEXT_WINDOW_SIZE", 0)
+            pages_to_extract = set()
+            for page_num in important_pages:
+                current_chapter_info = page_map.get(page_num)
+                if not current_chapter_info:
+                    continue
+                current_chapter_id = current_chapter_info["chapter_id"]
+                pages_to_extract.add(page_num)
+                for i in range(1, window_size + 1):
+                    if (prev_info := page_map.get(page_num - i)) and prev_info["chapter_id"] == current_chapter_id:
+                        pages_to_extract.add(page_num - i)
+                    if (next_info := page_map.get(page_num + i)) and next_info["chapter_id"] == current_chapter_id:
+                        pages_to_extract.add(page_num + i)
+            sorted_pages = sorted(list(pages_to_extract))
+            if primary_hit_page and primary_hit_page in page_map:
+                final_context.extend(page_map[primary_hit_page]["blocks"])
+            for page_num in sorted_pages:
+                if page_num != primary_hit_page and page_num in page_map:
+                    final_context.extend(page_map[page_num]["blocks"])
+        elif strategy == "rerank_by_frequency":
+            scored_blocks = _calculate_block_frequencies(retrieved_docs)
+            representative_chunks = [chunks[0] for _, chunks in scored_blocks[:top_k]]
+            final_context = _expand_chunks_to_blocks(representative_chunks)
+        elif strategy == "select_by_rank":
+            unique_docs_map = {f"{doc.metadata.get('block_id', '')}_{doc.page_content}": doc for doc in retrieved_docs}
+            representative_chunks = list(unique_docs_map.values())[:top_k]
+            final_context = _expand_chunks_to_blocks(representative_chunks)
+        else:
+            logger.warning(f"Unknown strategy '{strategy}'. Defaulting to top-k raw chunks.")
+            final_context = retrieved_docs[:top_k]
+        logger.info(f"Selected {len(final_context)} final context blocks using '{strategy}' strategy.")
+        return final_context

backend/rag/siglip_embedder.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import List
+import torch
+import torch.nn.functional as F
+from langchain.embeddings.base import Embeddings
+from transformers import AutoModel, AutoTokenizer
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+class CustomSigLipEmbeddings(Embeddings):
+    """Custom LangChain embedding wrapper for SigLIP models with normalization.
+    It inherits from LangChain's `Embeddings` base class, ensuring it
+    implements the required `embed_documents` and `embed_query` methods."""
+    def __init__(self, siglip_model_name: str, device: str = "cpu", normalize_embeddings: bool = True):
+        super().__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained(siglip_model_name, token=HF_TOKEN)
+        self.model = AutoModel.from_pretrained(siglip_model_name, token=HF_TOKEN).to(device)
+        self.device = device
+        self.normalize_embeddings = normalize_embeddings
+    def _embed(self, texts: List[str]) -> torch.Tensor:
+        """Helper function to generate and normalize embeddings."""
+        inputs = self.tokenizer(
+            texts, padding="max_length", truncation=True, max_length=64, return_tensors="pt"
+        ).to(self.device)
+        with torch.no_grad():
+            text_features = self.model.get_text_features(**inputs)
+        if self.normalize_embeddings:
+            text_features = F.normalize(text_features, p=2, dim=1)
+        return text_features
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Generate normalized embeddings for a list of documents."""
+        return self._embed(texts).cpu().numpy().tolist()
+    def embed_query(self, text: str) -> List[float]:
+        """Generate a normalized embedding for a single query text."""
+        return self._embed([text])[0].cpu().numpy().tolist()

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,47 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# --- Core Flask Application ---
+flask
+gunicorn
+requests
+pillow
+diskcache
+google-auth
+# --- RAG: PDF and Text Processing ---
+PyMuPDF # For parsing PDF files (fitz)
+nltk # For NLTKTextSplitter
+tqdm # For progress bars during knowledge base build
+# --- RAG: LangChain Components ---
+langchain
+langchain_community
+langchain-huggingface
+langchain-text-splitters
+chromadb # Vector store for embeddings
+rank_bm25 # For the BM25 sparse retriever
+# --- RAG: ML/NLP Models & Frameworks ---
+# User-specified versions for reproducibility
+torch==2.6.0
+numpy==2.0.2
+# Libraries for loading and running the embedding and NER models
+sentence-transformers
+transformers
+accelerate
+bitsandbytes
+sentencepiece
+stanza # For NER (Named Entity Recognition)

backend/routes.py ADDED Viewed

	@@ -0,0 +1,218 @@

+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+import shutil  # For zipping the cache directory
+from dataclasses import asdict
+from functools import wraps
+from pathlib import Path
+from flask import Blueprint, request, jsonify, current_app, send_from_directory
+import case_util
+import config
+from background_task_manager import BackgroundTaskManager
+from models import ConversationTurn
+# Use pathlib to construct the path to the images directory
+# This is more robust than relative string paths.
+IMAGE_DIR = Path(__file__).parent / 'data/images'
+main_bp = Blueprint('main', __name__)
+logger = logging.getLogger(__name__)
+@main_bp.after_request
+def log_full_cycle(response):
+    """
+    This function runs after a request and has access to both
+    the incoming 'request' and the outgoing 'response'.
+    """
+    if response.status_code != 200:
+        logger.error(
+            f"Request: {request.method} {request.path} | "
+            f"Response Status: {response.status}"
+        )
+    # You MUST return the response object
+    return response
+@main_bp.route('/api/case/<case_id>/stub', methods=['GET'])
+def get_case(case_id):
+    available_reports = current_app.config["AVAILABLE_REPORTS"]
+    if case_id not in available_reports:
+        logger.error(f"Case Id {case_id} does not exist.")
+        return jsonify({"error": f"Case Id {case_id} does not exist."}), 400
+    return jsonify(asdict(available_reports.get(case_id)))
+@main_bp.route('/api/case/stub', methods=['GET'])
+def get_cases():
+    available_reports = current_app.config["AVAILABLE_REPORTS"]
+    cases = available_reports.values()
+    return jsonify([asdict(case) for case in cases])
+def rag_initialization_complete_required(f):
+    @wraps(f)
+    def decorated_function(*args, **kwargs):
+        task_manager: BackgroundTaskManager = current_app.config.get('TASK_MANAGER')
+        # Check if RAG task has failed
+        if task_manager.get_error("rag_system"):
+            return jsonify({"error": "A critical background task failed. Check application logs."}), 500
+        # Check if RAG task is still running
+        if not task_manager.is_task_done("rag_system"):
+            logger.warning("RAG initialization is running..")
+            response = jsonify(
+                {"status": "initializing", "message": "The system is starting up. Please try again in 60 seconds."})
+            response.headers['Retry-After'] = 60
+            return response, 503
+        return f(*args, **kwargs)
+    return decorated_function
+@main_bp.route('/api/case/<case_id>/all-questions', methods=['GET'])
+@rag_initialization_complete_required
+def get_all_questions(case_id):
+    """Retrieves all questions for a given case ID, prioritizing cached data and generating live questions via LLM if necessary."""
+    logger.info(f"Retrieve all questions for the given case '{case_id}'")
+    cache_manager = current_app.config['DEMO_CACHE']
+    # 1. Check the cache first
+    if config.USE_CACHE and cache_manager:
+        all_mcqs_sequence = cache_manager.get_all_mcqs_sequence(case_id)
+        if len(all_mcqs_sequence) > 0:
+            logger.info(f"CACHE HIT for case '{case_id}'")
+            randomized_choices_mcqs = case_util.randomize_mcqs(all_mcqs_sequence)
+            return jsonify([asdict(mcq) for mcq in randomized_choices_mcqs])
+    # 2. CACHE MISS: Generate live
+    logger.info(
+        f"CACHE MISS or cache disabled for case '{case_id}'. Generating live question...")
+    llm_client = current_app.config['LLM_CLIENT']
+    if not llm_client:
+        logger.error(
+            "LLM client (REST API) not initialized. Cannot process request.")
+        return jsonify({"error": "LLM client not initialized."}), 500
+    static_case_info = current_app.config['AVAILABLE_REPORTS'].get(case_id)
+    if not static_case_info:
+        logger.error(f"Static case data for id {case_id} not found.")
+        return jsonify({"error": f"Static case data for id {case_id} not found."}), 404
+    rag_cache = current_app.config.get('RAG_CONTEXT_CACHE', {})
+    prefetched_data = rag_cache.get(case_id, {})
+    guideline_context_string = prefetched_data.get("context_string", "")
+    live_generated_mcqs = llm_client.generate_all_questions(
+        case_data=asdict(static_case_info),
+        guideline_context=guideline_context_string
+    )
+    if live_generated_mcqs is not None and len(live_generated_mcqs) > 0:
+        # 3. WRITE-THROUGH: Update the cache with the new question if caching is enabled
+        if config.USE_CACHE and cache_manager:
+            cache_manager.add_all_mcqs_to_case(case_id, live_generated_mcqs)
+        randomized_choices_mcqs = case_util.randomize_mcqs(live_generated_mcqs)
+        return jsonify([asdict(mcq) for mcq in randomized_choices_mcqs]), 200
+    else:
+        logger.error("MCQ Sequence generation failed.")
+        return jsonify(
+            {"error": "MCQ Sequence generation failed."}), 500
+@main_bp.route('/api/case/<case_id>/summarize', methods=['POST'])
+@rag_initialization_complete_required
+def get_case_summary(case_id):
+    """
+    API endpoint to generate a case summary.
+    This version first attempts to load from cache, then falls back to building on the fly.
+    """
+    data = request.get_json(force=True)
+    conversation_history_data = data.get('conversation_history')
+    if not conversation_history_data:
+        logger.error(f"Missing 'conversation_history' in request body for case {case_id}.")
+        return jsonify({"error": f"Missing 'conversation_history' in request body for case {case_id}."}), 400
+    try:
+        summary_template = None
+        # First, try to get the summary from the cache, if caching is enabled
+        cache_manager = current_app.config.get('DEMO_CACHE')
+        if cache_manager:
+            summary_template = cache_manager.get_summary_template(case_id)
+            if summary_template:
+                logger.info(f"Summary template for case {case_id} found in cache.")
+        # If cache is disabled OR the template was not in the cache, build it now
+        if summary_template is None:
+            logger.warning(f"Summary template for case {case_id} not in cache or cache disabled. Building on the fly.")
+            static_case_info = current_app.config['AVAILABLE_REPORTS'].get(case_id)
+            if not static_case_info:
+                logger.error(f"Static case data for case {case_id} not found.")
+                return jsonify({"error": f"Static case data for case {case_id} not found."}), 404
+            summary_template = case_util.build_summary_template(static_case_info,
+                                                                current_app.config.get('RAG_CONTEXT_CACHE', {}))
+            if cache_manager:
+                cache_manager.save_summary_template(case_id, summary_template)
+        if summary_template is None:
+            logger.error(f"Summary template not found for case {case_id}.")
+            return jsonify({"error": f"An internal error occurred."}), 500
+        # Once summary template is ready, we can programmatically populate rationale based on user's journey
+        conversation_turns = [ConversationTurn.from_dict(turn) for turn in conversation_history_data]
+        summary = case_util.populate_rationale(summary_template, conversation_turns)
+        return jsonify(asdict(summary)), 200
+    except Exception as e:
+        logger.error(f"Error generating summary for case {case_id}: {e}", exc_info=True)
+        return jsonify({"error": f"An internal error occurred: {e}"}), 500
+@main_bp.route('/app/download_cache')
+@rag_initialization_complete_required
+def download_cache_zip():
+    """Zips the cache directory and serves it for download."""
+    zip_filename = "rad-learn-cache.zip"
+    # Create the zip file in a temporary directory
+    # Using /tmp is common in containerized environments
+    temp_dir = "/tmp"
+    zip_base_path = os.path.join(temp_dir, "rad-learn-cache")  # shutil adds .zip
+    zip_filepath = zip_base_path + ".zip"
+    # Ensure the cache directory exists before trying to zip it
+    cache_manager = current_app.config.get('DEMO_CACHE')
+    cache_directory = cache_manager.cache_directory
+    if not os.path.isdir(cache_directory):
+        logger.error(f"Cache directory not found at {cache_directory}")
+        return jsonify({"error": f"Cache directory not found on server: {cache_directory}"}), 500
+    try:
+        logger.info(f"Creating zip archive of cache directory: {cache_directory} to {zip_filepath}")
+        shutil.make_archive(
+            zip_base_path,  # This is the base name, shutil adds the .zip extension
+            "zip",
+            cache_directory,  # This is the root directory to archive
+        )
+        logger.info("Zip archive created successfully.")
+        # Send the file and then clean it up
+        return send_from_directory(temp_dir, zip_filename, as_attachment=True)
+    except Exception as e:
+        logger.error(f"Error creating or sending zip archive of cache directory: {e}", exc_info=True)
+        return jsonify({"error": f"Error creating or sending zip archive: {e}"}), 500

frontend/index.html ADDED Viewed

	@@ -0,0 +1,29 @@

+<!--
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+-->
+<!doctype html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8"/>
+    <link href="/vite.svg" rel="icon" type="image/svg+xml"/>
+    <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
+    <title>Flask + React App</title>
+</head>
+<body>
+<div id="root"></div>
+<script src="/src/main.jsx" type="module"></script>
+</body>
+</html>

frontend/package.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "name": "flask-react-frontend",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
+    "react-tooltip": "^5.29.1"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.15",
+    "@types/react-dom": "^18.2.7",
+    "@vitejs/plugin-react": "^4.0.3",
+    "vite": "^4.4.5"
+  }
+}

frontend/public/index.html ADDED Viewed

	@@ -0,0 +1,35 @@

+<!--
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+-->
+<!doctype html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8"/>
+    <link href="/vite.svg" rel="icon" type="image/svg+xml"/>
+    <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
+    <title>React Multi-Screen App</title>
+    <link href="https://fonts.googleapis.com" rel="preconnect">
+    <link crossorigin href="https://fonts.gstatic.com" rel="preconnect">
+    <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Google+Sans+Text:ital,wght@0,400;0,500;0,700;1,500&display=swap"
+          rel="stylesheet">
+    <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:opsz,wght,FILL,[email protected],100..700,0..1,-50..200"
+          rel="stylesheet"/>
+</head>
+<body>
+<div id="root"></div>
+<script src="/src/main.jsx" type="module"></script>
+</body>
+</html>

frontend/public/vite.svg ADDED Viewed

frontend/src/App.css ADDED Viewed

	@@ -0,0 +1,90 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+:root {
+  --font-family-text: 'Google Sans Text', 'Segoe UI', Roboto, sans-serif;
+  --font-family-display: 'Google Sans', 'Segoe UI', Roboto, sans-serif;
+  /* Font Sizes */
+  --font-size-xs: 12px;
+  --font-size-sm: 14px;
+  --font-size-md: 16px;
+  --font-size-lg: 20px;
+  --font-size-xl: 22px;
+  --font-size-xxl: 24px;
+  --font-size-xxxl: 32px;
+  /* Font Weights */
+  --font-weight-regular: 400;
+  --font-weight-medium: 500;
+  --font-weight-bold: 700;
+}
+.app-container {
+  width: 100%;
+  min-height: 100vh;
+  display: grid;
+  place-items: center;
+  background-color: #ffffff;
+  color: #1a1d21;
+  font-family: var(--font-family-text);
+  overflow: auto;
+  box-sizing: border-box;
+}
+.material-symbols-outlined {
+  font-family: 'Material Symbols Outlined', sans-serif;
+  font-weight: normal;
+  font-style: normal;
+  font-size: 24px;
+  line-height: 1;
+  letter-spacing: normal;
+  text-transform: none;
+  display: inline-block;
+  white-space: nowrap;
+  word-wrap: normal;
+  direction: ltr;
+  -webkit-font-feature-settings: 'liga';
+  -webkit-font-smoothing: antialiased;
+}
+.custom-tooltip.react-tooltip {
+  max-width: 300px;
+  background: linear-gradient(135deg, #2a2f3a, #1e1f26);
+  color: #f8f9fa;
+  font-family: 'Inter', system-ui, sans-serif;
+  font-size: 14px;
+  line-height: 1.6;
+  padding: 12px 16px;
+  border-radius: 10px;
+  box-shadow: 0 8px 24px rgba(0, 0, 0, 0.25),
+  0 2px 8px rgba(0, 0, 0, 0.15);
+  border: 1px solid rgba(255, 255, 255, 0.05);
+  white-space: normal;
+  word-wrap: break-word;
+  opacity: 1;
+  z-index: 9999;
+  backdrop-filter: blur(6px);
+  transition: opacity 0.2s ease, transform 0.2s ease;
+}
+.custom-tooltip.react-tooltip [data-popper-arrow] {
+  width: 10px;
+  height: 10px;
+  background: inherit;
+  transform: rotate(45deg);
+  z-index: -1;
+}

frontend/src/App.jsx ADDED Viewed

	@@ -0,0 +1,107 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React, {useState} from 'react';
+import LandingScreen from './screens/LandingScreen';
+import JourneySelectionScreen from './screens/JourneySelectionScreen';
+import ChatScreen from './screens/ChatScreen';
+import SummaryScreen from './screens/SummaryScreen';
+import DetailsOverlay from './components/DetailsOverlay';
+import {Tooltip} from 'react-tooltip'; // ADD THIS
+import 'react-tooltip/dist/react-tooltip.css'; // ADD THIS
+function App() {
+	const [currentScreen, setCurrentScreen] = useState('landing');
+	const [selectedJourney, setSelectedJourney] = useState(null);
+	const [isDetailsOverlayVisible, setIsDetailsOverlayVisible] = useState(false);
+	const [caseImagesCache, setCaseImagesCache] = useState({});
+	const [summaryData, setSummaryData] = useState(null);
+	const handleLaunchJourney = (journey) => {
+		setSelectedJourney(journey);
+		setSummaryData(null);
+		setCurrentScreen('chat');
+	};
+	const handleNavigate = (screen) => {
+		setCurrentScreen(screen);
+	};
+	const handleShowDetails = (show) => {
+		setIsDetailsOverlayVisible(show);
+	};
+	const updateImageCache = (caseId, imageUrl) => {
+		setCaseImagesCache(prevCache => ({
+			...prevCache,
+			[caseId]: imageUrl
+		}));
+	};
+	const handleGoToSummary = (data) => {
+		setSummaryData(data);
+		setCurrentScreen('summary');
+	};
+	const renderScreen = () => {
+		const screenProps = {
+			onNavigate: handleNavigate,
+			onShowDetails: () => handleShowDetails(true)
+		};
+		switch (currentScreen) {
+			case 'journeySelection':
+				return <JourneySelectionScreen {...screenProps} onLaunchJourney={handleLaunchJourney}/>;
+			case 'chat':
+				return (
+					<ChatScreen
+						{...screenProps}
+						journey={selectedJourney}
+						cachedImage={caseImagesCache[selectedJourney?.id]}
+						onImageLoad={(imageUrl) => updateImageCache(selectedJourney.id, imageUrl)}
+						onGoToSummary={handleGoToSummary}
+					/>
+				);
+			case 'summary':
+				return (
+					<SummaryScreen
+						{...screenProps}
+						journey={selectedJourney}
+						cachedImage={caseImagesCache[selectedJourney?.id]}
+						summaryData={summaryData}
+					/>
+				);
+			case 'landing':
+			default:
+				return <LandingScreen
+					onStartJourney={() => handleNavigate('journeySelection')}
+					onShowDetails={screenProps.onShowDetails}
+				/>;
+		}
+	};
+	return (
+		<div className="app-container">
+			{renderScreen()}
+			{isDetailsOverlayVisible && <DetailsOverlay onClose={() => handleShowDetails(false)}/>}
+			<Tooltip anchorSelect=".tooltip-trigger" className="custom-tooltip" arrow={true}/>
+		</div>
+	);
+}
+export default App;

frontend/src/assets/home_chest_logo.jpg ADDED Viewed

frontend/src/components/ChatMessage.jsx ADDED Viewed

	@@ -0,0 +1,40 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+import styles from './ChatMessage.module.css';
+import IconAstrophotography from '../icons/IconAstrophotography';
+import IconPerson from '../icons/IconPerson';
+const ChatMessage = ({type, text, children}) => {
+	const isUser = type === 'user';
+	const wrapperClass = isUser ? styles.userMessageWrapper : styles.systemMessageWrapper;
+	const bubbleClass = isUser ? styles.userMessageBox : styles.systemMessageBox;
+	const icon = isUser ? <IconPerson className={styles.avatarIcon}/> :
+		<IconAstrophotography className={styles.avatarIcon}/>;
+	return (
+		<div className={wrapperClass}>
+			{!isUser && icon}
+			<div className={bubbleClass}>
+				{text || children}
+			</div>
+			{isUser && icon}
+		</div>
+	);
+};
+export default ChatMessage;

frontend/src/components/ChatMessage.module.css ADDED Viewed

	@@ -0,0 +1,70 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+.systemMessageWrapper, .userMessageWrapper {
+  display: flex;
+  align-items: flex-start;
+  gap: 12px;
+  width: 100%;
+}
+.systemMessageWrapper {
+  justify-content: flex-start;
+}
+.userMessageWrapper {
+  justify-content: flex-end;
+}
+.avatarIcon {
+  width: 32px;
+  height: 32px;
+  flex-shrink: 0;
+  border-radius: 50%;
+  padding: 4px;
+  box-sizing: border-box;
+}
+.systemMessageWrapper .avatarIcon {
+  background-color: #C2E7FF;
+  color: #0B57D0;
+}
+.userMessageWrapper .avatarIcon {
+  background-color: #0B57D0;
+  color: #D3E3FD;
+}
+.systemMessageBox, .userMessageBox {
+  padding: 12px 16px;
+  border-radius: 18px;
+  max-width: 80%;
+  font-size: var(--font-size-md);
+  line-height: 1.5;
+  white-space: pre-wrap;
+}
+.systemMessageBox {
+  background-color: #E8F0FE;
+  color: #1a1d21;
+  border-top-left-radius: 4px;
+}
+.userMessageBox {
+  background-color: #E7E7E7;
+  color: #353535;
+  border-top-right-radius: 4px;
+}

frontend/src/components/DetailsOverlay.jsx ADDED Viewed

	@@ -0,0 +1,122 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+import styles from './DetailsOverlay.module.css';
+import IconClose from '../icons/IconClose';
+const DetailsOverlay = ({onClose}) => {
+	return (
+		<div className={styles.dialogOverlay} onClick={onClose} role="dialog" aria-modal="true"
+		     aria-labelledby="dialog-title">
+			<div className={styles.dialogBox} onClick={(e) => e.stopPropagation()}>
+				<button id="dialog-close-button" className={styles.dialogCloseBtn} aria-label="Close dialog" onClick={onClose}>
+					<IconClose/>
+				</button>
+				<h2 id="dialog-title" className={styles.dialogTitleText}>Details About This Demo</h2>
+				<div className={styles.dialogBodyScrollable}>
+					<p>
+						<b>The Model:</b> This demo features Google's MedGemma-27B, a Gemma 3-based model fine-tuned for
+						comprehending medical text and images, specifically Chest X-Rays. It demonstrates MedGemma's ability to
+						facilitate the learning process for medical students by advanced interpretation of medical images and
+						contextual question generation while leveraging clinical guidelines. Context from clinical guidelines are
+						generated using RAG which utilizes Google's MedSigLIP embedding model to build a vector index database.
+					</p>
+					<p>
+						<b>Accessing and Using the Model:</b> Google's MedGemma-27B is available on{' '}
+						<a href="https://huggingface.co/google/medgemma-27b-it" target="_blank" rel="noopener noreferrer">
+							HuggingFace<img className={styles.inlineLogo}
+							                src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"
+							                alt="Hugging Face Logo"/>
+						</a>{' '}
+						and{' '}
+						<a href="https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/medgemma" target="_blank"
+						   rel="noopener noreferrer">
+							Model Garden <img className={styles.inlineLogo}
+							                  src="https://www.gstatic.com/cloud/images/icons/apple-icon.png"
+							                  alt="Model Garden Logo"/>
+						</a>.
+						Learn more about using the model and its limitations on the{' '}
+						<a href="https://developers.google.com/health-ai-developer-foundations?referral=rad_learning_companion"
+						   target="_blank" rel="noopener noreferrer">
+							HAI-DEF developer site
+						</a>.
+					</p>
+					<p>
+						<b>Health AI Developer Foundations (HAI-DEF):</b> Provides a collection of open-weight models and companion
+						resources to empower developers in building AI models for healthcare.
+					</p>
+					<p>
+						<b>Enjoying the Demo?</b> We'd love your feedback! If you found this demo helpful, please show your
+						appreciation by clicking the ❤️ button on the HuggingFace page, linked at the top.
+					</p>
+					<p>
+						<b>Explore More Demos:</b> Discover additional demos on HuggingFace Spaces or via Colabs:
+					</p>
+					<ul>
+						<li>
+							<a href="https://huggingface.co/spaces/google/cxr-foundation-demo" target="_blank"
+							   rel="noopener noreferrer">
+								CXR Foundations Demo <img className={styles.inlineLogo}
+								                          src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"
+								                          alt="Hugging Face Logo"/>
+							</a>{' '}
+							- Showcases on-browser, data-efficient, and zero-shot classification of CXR images.
+						</li>
+						<li>
+							<a href="https://huggingface.co/spaces/google/path-foundation-demo" target="_blank"
+							   rel="noopener noreferrer">
+								Path Foundations Demo <img className={styles.inlineLogo}
+								                           src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"
+								                           alt="Hugging Face Logo"/>
+							</a>{' '}
+							- Highlights on-browser, data-efficient classification and outlier detection within pathology slides.
+						</li>
+						<li>
+							<a href="https://huggingface.co/spaces/google/rad_explain" target="_blank" rel="noopener noreferrer">
+								MedGemma Rad Explain <img className={styles.inlineLogo}
+								                          src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/58/Echo_link-blue_icon_slanted.svg/1920px-Echo_link-blue_icon_slanted.svg.png"
+								                          alt="Link icon"/>
+							</a>{' '}
+							- Analyzes a radiology report and its corresponding CXR/CT image, generating AI explanations for selected
+							sentences with visual context.
+						</li>
+						<li>
+							<a href="https://github.com/Google-Health/medgemma/tree/main/notebooks/fine_tune_with_hugging_face.ipynb"
+							   target="_blank" rel="noopener noreferrer">
+								Finetune MedGemma Colab <img className={styles.inlineLogo}
+								                             src="https://upload.wikimedia.org/wikipedia/commons/d/d0/Google_Colaboratory_SVG_Logo.svg"
+								                             alt="Google Colab Logo"/>
+							</a>{' '}
+							- See an example of how to fine-tune this model.
+						</li>
+						<li>
+							<a href="https://huggingface.co/spaces/google/appoint-ready" target="_blank" rel="noopener noreferrer">
+								Simulated Pre-visit Intake <img className={styles.inlineLogo}
+								                                src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/58/Echo_link-blue_icon_slanted.svg/1920px-Echo_link-blue_icon_slanted.svg.png"
+								                                alt="Link icon"/>
+							</a>{' '}
+							- Simulates a pre-visit patient dialogue, generating an intelligent intake report with self-evaluated
+							insights for efficient provider use.
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	);
+};
+export default DetailsOverlay;

frontend/src/components/DetailsOverlay.module.css ADDED Viewed

	@@ -0,0 +1,143 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+.dialogOverlay {
+  position: fixed;
+  inset: 0;
+  background-color: rgba(0, 0, 0, 0.6);
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  z-index: 1000;
+  animation: fadeIn 0.3s ease-out;
+}
+@keyframes fadeIn {
+  from {
+    opacity: 0;
+  }
+  to {
+    opacity: 1;
+  }
+}
+.dialogBox {
+  background: white;
+  border-radius: 12px;
+  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
+  width: 90%;
+  max-width: 600px;
+  max-height: 80vh;
+  display: flex;
+  flex-direction: column;
+  position: relative;
+  padding: 24px;
+  animation: slideUp 0.4s ease-out;
+}
+@keyframes slideUp {
+  from {
+    transform: translateY(20px);
+    opacity: 0;
+  }
+  to {
+    transform: translateY(0);
+    opacity: 1;
+  }
+}
+.dialogCloseBtn {
+  position: absolute;
+  top: 16px;
+  right: 16px;
+  background: none;
+  border: none;
+  cursor: pointer;
+  padding: 8px;
+  border-radius: 50%;
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  transition: background-color 0.2s;
+  color: #5f6368;
+}
+.dialogCloseBtn:hover {
+  background-color: #f0f0f0;
+}
+.dialogTitleText {
+  font-family: var(--font-family-display);
+  font-size: var(--font-size-xl);
+  color: #202124;
+  text-align: left;
+  margin: 0;
+  padding-bottom: 16px;
+  border-bottom: 1px solid #e0e0e0;
+}
+.dialogBodyScrollable {
+  overflow-y: auto;
+  padding-top: 16px;
+  padding-right: 16px;
+  text-align: left;
+}
+.dialogBodyScrollable p {
+  font-size: var(--font-size-sm);
+  line-height: 1.6;
+  margin-bottom: 16px;
+  color: #444746;
+}
+.dialogBodyScrollable b {
+  font-weight: var(--font-weight-bold);
+  color: #202124;
+}
+.dialogBodyScrollable a {
+  color: #0B57D0;
+  text-decoration: underline;
+  font-weight: var(--font-weight-medium);
+  display: inline-flex;
+  align-items: center;
+  gap: 4px;
+}
+.dialogBodyScrollable a:hover {
+  text-decoration: underline;
+}
+.inlineLogo {
+  height: 16px;
+  width: auto;
+  vertical-align: middle;
+  margin-left: 2px;
+}
+.dialogBodyScrollable ul {
+  list-style-type: disc;
+  padding-left: 20px;
+  margin-top: -10px;
+}
+.dialogBodyScrollable li {
+  margin-bottom: 12px;
+  padding-left: 4px;
+  font-size: var(--font-size-sm);
+  line-height: 1.6;
+  color: #444746;
+}

frontend/src/components/JourneyCard.jsx ADDED Viewed

	@@ -0,0 +1,36 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+import styles from './JourneyCard.module.css';
+import IconArticlePerson from '../icons/IconArticlePerson';
+const JourneyCard = ({journey, onLaunch}) => {
+	return (
+		<div className={styles.card}>
+			<img src={journey.imageUrl} alt={journey.label} className={styles.cardImage}/>
+			<div className={styles.cardFooter}>
+				<span className={styles.label}>{journey.label}</span>
+				<button className={styles.launchButton} onClick={onLaunch}>
+					<IconArticlePerson className={styles.buttonIcon}/>
+					Launch
+				</button>
+			</div>
+		</div>
+	);
+};
+export default JourneyCard;

frontend/src/components/JourneyCard.module.css ADDED Viewed

	@@ -0,0 +1,80 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+.card {
+  width: 100%;
+  max-width: 452px;
+  background: white;
+  border-radius: 14px;
+  border: 2px #E9E9E9 solid;
+  display: flex;
+  flex-direction: column;
+  justify-content: space-between;
+  transition: box-shadow 0.3s ease;
+}
+.card:hover {
+  box-shadow: 0 8px 16px rgba(0, 0, 0, 0.1);
+}
+.cardImage {
+  width: 100%;
+  aspect-ratio: 1 / 1;
+  object-fit: contain;
+}
+.cardFooter {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  padding: 16px 24px;
+  background-color: white;
+  border-top: 2px #E9E9E9 solid;
+}
+.label {
+  color: black;
+  font-size: var(--font-size-md);
+  font-weight: var(--font-weight-medium);
+  line-height: 24px;
+}
+.launchButton {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: 8px;
+  padding: 8px 16px;
+  border-radius: 100px;
+  background: #0B57D0;
+  color: white;
+  border: none;
+  font-size: var(--font-size-md);
+  font-weight: var(--font-weight-medium);
+  font-family: var(--font-family-text);
+  cursor: pointer;
+  transition: background-color 0.2s;
+}
+.launchButton:hover {
+  background: #0a4ab5;
+}
+.buttonIcon {
+  width: 20px;
+  height: 20px;
+  fill: white;
+}

frontend/src/components/MCQOption.jsx ADDED Viewed

	@@ -0,0 +1,41 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+import IconRadioButton from '../icons/IconRadioButton';
+import styles from './MCQOption.module.css';
+import TextWithTooltips from './TextWithTooltips';
+const MCQOption = ({text, onClick, disabled, isSelected, isIncorrect}) => {
+	const buttonClasses = [
+		styles.optionButton,
+		isSelected ? styles.selected : '',
+		isIncorrect ? styles.incorrect : ''
+	].join(' ');
+	return (
+		<button
+			className={buttonClasses}
+			onClick={onClick}
+			disabled={disabled}
+		>
+			<IconRadioButton/>
+			<span><TextWithTooltips text={text}/></span>
+		</button>
+	);
+};
+export default MCQOption;

frontend/src/components/MCQOption.module.css ADDED Viewed

	@@ -0,0 +1,56 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+.optionButton {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  width: 100%;
+  padding: 12px;
+  background-color: transparent;
+  border: 1px solid #E0E0E0;
+  border-radius: 8px;
+  text-align: left;
+  font-size: var(--font-size-md);
+  font-family: var(--font-family-text);
+  color: #333;
+  cursor: pointer;
+  transition: background-color 0.2s, border-color 0.2s;
+}
+.optionButton:hover:not(:disabled) {
+  background-color: #f0f2f5;
+  border-color: #0B57D0;
+}
+.optionButton:disabled {
+  cursor: not-allowed;
+  opacity: 0.7;
+}
+.optionButton.selected {
+  border-color: #0B57D0;
+  background-color: #e8f0fe;
+}
+.optionButton.incorrect {
+  background-color: #F8D7DA;
+  border-color: #F5C6CB;
+  color: #721C24;
+  text-decoration: line-through;
+  cursor: not-allowed;
+  opacity: 1;
+}

frontend/src/components/RedactedTextView.js ADDED Viewed

	@@ -0,0 +1,42 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+/**
+ * Replaces specified phrases (multi-word strings) in a text string with '__?___'.
+ * @param {string} inputText The text to process.
+ * @param {string[]} phrasesToRedact An array of phrases to be replaced.
+ * @returns {string} The text with specified phrases redacted.
+ */
+export const redactPhrases = (inputText, phrasesToRedact) => {
+	if (!inputText || !phrasesToRedact || phrasesToRedact.length === 0) {
+		return inputText || "";
+	}
+	let processedText = inputText;
+	// Sort phrases by length (descending) to redact longer phrases first.
+	// This prevents issues where a shorter phrase is part of a longer one.
+	const sortedPhrases = phrasesToRedact.sort((a, b) => b.length - a.length);
+	sortedPhrases.forEach(phrase => {
+		// Create a global, case-insensitive regex for the current phrase.
+		const regex = new RegExp(phrase, 'gi');
+		// Replace the found phrase with 'X's of the same length.
+		processedText = processedText.replace(regex, '__?__');
+	});
+	return processedText;
+};

frontend/src/components/TextWithTooltips.jsx ADDED Viewed

	@@ -0,0 +1,47 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+import {medicalTerms} from '../data/medicalTerms';
+const TextWithTooltips = ({text}) => {
+	const sortedKeys = Object.keys(medicalTerms).sort((a, b) => b.length - a.length);
+	const regex = new RegExp(`(${sortedKeys.join('|')})`, 'gi');
+	const parts = text.split(regex);
+	return (
+		<>
+			{parts.map((part, index) => {
+				const lowerCasePart = part.toLowerCase();
+				if (medicalTerms[lowerCasePart]) {
+					return (
+						<span
+							key={index}
+							className="tooltip-trigger"
+							data-tooltip-content={medicalTerms[lowerCasePart]}
+							style={{borderBottom: '1px dotted'}}
+						>
+              {part}
+            </span>
+					);
+				}
+				return part;
+			})}
+		</>
+	);
+};
+export default TextWithTooltips;

frontend/src/data/constants.js ADDED Viewed

	@@ -0,0 +1,23 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+export const CXR_14_CITATION = `The images are from the NIH Chest X-Ray dataset, which is available for download at: https://nihcc.app.box.com/v/ChestXray-NIHCC. The dataset was provided by the NIH Clinical Center. The following paper provides a detailed description of the dataset:
+Xiaosong Wang, Yifan Peng, Le Lu, Zhiyong Lu, Mohammadhadi Bagheri, Ronald Summers, ChestX-ray8: Hospital-scale Chest X-ray Database and Benchmarks on Weakly-Supervised Classification and Localization of Common Thorax Diseases, IEEE CVPR, pp. 3462-3471, 2017.`
+export const CONDITION_TERMS = ["Pleural Effusion", "Cardiomegaly",
+	"Cardiomegally", "Atelectasis", "Pneumonia", "Pneumothorax", "Edema",
+	"Emphysema", "Fibrosis", "Pleural Thickening", "Hernia"]

frontend/src/data/medicalTerms.js ADDED Viewed

	@@ -0,0 +1,32 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+export const medicalTerms = {
+  "pleural line": "A very thin, fine white line on an X-Ray that shows the actual edge of the lung, often seen when the lung is separated from the chest wall due to underlying abnormality.",
+  "effusion": "A general term for fluid buildup. On a Chest X-Ray, this fluid appears white and often settles in the lowest parts of the chest due to gravity.",
+  "costophrenic angle": "The sharp, pointed corner at the very bottom of each lung seen on an X-Ray, located where your diaphragm (breathing muscle) meets your ribs.",
+  "mediastinum": "The central area in the chest that sits between the lungs. On an X-Ray, this column contains the heart, windpipe, and major blood vessels.",
+  "mediastinal": "An adjective describing anything located within the mediastinum, which is the central compartment of the chest that separates the right and left lungs.",
+  "hemithorax": "On a Chest X-Ray (CXR), hemithorax refers to either the right or left half of the chest cavity, encompassing the lung and surrounding structures.",
+  "catheter": "A thin, flexible tube that shows up as a distinct line on an X-Ray. It is placed in the body to deliver fluids or for monitoring.",
+  "meniscus sign": "A specific crescent or U-shape that fluid creates on a Chest X-Ray as it appears to climb up the side of the lung, like water in a glass.",
+  "meniscus": "A curved, half moon shape that appears on an X-Ray generally indicating underlying fluid collection or a mass.",
+  "lung apex": "The very top, rounded part of the lung. On a Chest X-Ray, this is the portion of the lung that is visible above the collarbone.",
+  "pleural space": "The potential thin gap between the outer surface of the lung and the inner chest wall. This space is visible on an X-Ray only when it’s abnormal.",
+  "cardiac": "An adjective meaning 'related to the heart.' In X-Rays, it's used to describe the size, shape, and outline of the heart's shadow.",
+  "thoracic": "An adjective meaning 'related to the chest.' It refers to all structures within the chest cavity, including the ribs, heart, and lungs shown on the X-ray.",
+  "radiopaque": "Describes substances that block X-Rays and therefore appear white or light on an image. Example - Bone, metal, and contrast dyes are all radiopaque."
+};

frontend/src/icons/IconArticlePerson.jsx ADDED Viewed

	@@ -0,0 +1,38 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+const IconArticlePerson = ({className, ...props}) => {
+	return (
+		<svg
+			width="20"
+			height="20"
+			viewBox="14 16 17 17"
+			fill="currentColor"
+			xmlns="http://www.w3.org/2000/svg"
+			className={className}
+			{...props}
+			preserveAspectRatio="xMidYMid meet"
+		>
+			<path
+				d="M18 21.5H26V20H18V21.5ZM16.5 31C16.0833 31 15.7292 30.8542 15.4375 30.5625C15.1458 30.2708 15 29.9167 15 29.5V18.5C15 18.0833 15.1458 17.7292 15.4375 17.4375C15.7292 17.1458 16.0833 17 16.5 17H27.5C27.9167 17 28.2708 17.1458 28.5625 17.4375C28.8542 17.7292 29 18.0833 29 18.5V22.5417C28.6667 22.2083 28.2847 21.9514 27.8542 21.7708C27.4236 21.5903 26.9722 21.5 26.5 21.5C25.875 21.5 25.2917 21.6597 24.75 21.9792C24.2083 22.2847 23.7778 22.7083 23.4583 23.25H18V24.75H23C22.9722 25.0556 22.9861 25.3542 23.0417 25.6458C23.1111 25.9375 23.2083 26.2222 23.3333 26.5H18V28H22.3333C22.0694 28.25 21.8611 28.5417 21.7083 28.875C21.5694 29.1944 21.5 29.5347 21.5 29.8958V31H16.5ZM23 31V29.8958C23 29.7431 23.0278 29.6042 23.0833 29.4792C23.1389 29.3403 23.2222 29.2222 23.3333 29.125C23.7778 28.75 24.2708 28.4722 24.8125 28.2917C25.3542 28.0972 25.9167 28 26.5 28C27.0833 28 27.6458 28.0972 28.1875 28.2917C28.7292 28.4722 29.2222 28.75 29.6667 29.125C29.7778 29.2222 29.8611 29.3403 29.9167 29.4792C29.9722 29.6042 30 29.7431 30 29.8958V31H23ZM26.5 27C25.9444 27 25.4722 26.8056 25.0833 26.4167C24.6944 26.0278 24.5 25.5556 24.5 25C24.5 24.4444 24.6944 23.9722 25.0833 23.5833C25.4722 23.1944 25.9444 23 26.5 23C27.0556 23 27.5278 23.1944 27.9167 23.5833C28.3056 23.9722 28.5 24.4444 28.5 25C28.5 25.5556 28.3056 26.0278 27.9167 26.4167C27.5278 26.8056 27.0556 27 26.5 27Z"
+			/>
+		</svg>
+	);
+};
+export default IconArticlePerson;

frontend/src/icons/IconAstrophotography.jsx ADDED Viewed

	@@ -0,0 +1,31 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+const IconAstrophotography = ({className, ...props}) => {
+	return (
+		<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"
+		     className={className} {...props}>
+			<path
+				d="M19 9L17.75 6.25L15 5L17.75 3.75L19 0.999999L20.25 3.75L23 5L20.25 6.25L19 9ZM19 23L17.75 20.25L15 19L17.75 17.75L19 15L20.25 17.75L23 19L20.25 20.25L19 23ZM10 20L7.5 14.5L2 12L7.5 9.5L10 4L12.5 9.5L18 12L12.5 14.5L10 20ZM10 15.15L11 13L13.15 12L11 11L10 8.85L9 11L6.85 12L9 13L10 15.15Z"
+				fill="#0B57D0"/>
+		</svg>
+	);
+};
+export default IconAstrophotography;

frontend/src/icons/IconBackArrow.jsx ADDED Viewed

	@@ -0,0 +1,35 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+const IconBackArrow = ({className, ...props}) => {
+	return (
+		<svg
+			width="20"
+			height="20"
+			viewBox="0 0 24 24"
+			fill="currentColor"
+			xmlns="http://www.w3.org/2000/svg"
+			className={className}
+			{...props}
+		>
+			<path d="M15.41 7.41L14 6L8 12L14 18L15.41 16.59L10.83 12L15.41 7.41Z"/>
+		</svg>
+	);
+};
+export default IconBackArrow;

frontend/src/icons/IconClose.jsx ADDED Viewed

	@@ -0,0 +1,28 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+const IconClose = ({className}) => (
+	<svg className={className} width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+		<path
+			d="M19 6.41L17.59 5L12 10.59L6.41 5L5 6.41L10.59 12L5 17.59L6.41 19L12 13.41L17.59 19L19 17.59L13.41 12L19 6.41Z"
+			fill="currentColor"/>
+	</svg>
+);
+export default IconClose;

frontend/src/icons/IconCodeBlocks.jsx ADDED Viewed

	@@ -0,0 +1,37 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+const IconCodeBlocks = ({className, ...props}) => {
+	return (
+		<svg
+			width="20"
+			height="20"
+			viewBox="0 0 20 20"
+			fill="currentColor"
+			xmlns="http://www.w3.org/2000/svg"
+			className={className}
+			{...props}
+		>
+			<path
+				d="M8 12.5L9.0625 11.4375L7.625 10L9.0625 8.5625L8 7.5L5.5 10L8 12.5ZM12 12.5L14.5 10L12 7.5L10.9375 8.5625L12.375 10L10.9375 11.4375L12 12.5ZM4.5 17C4.08333 17 3.72917 16.8542 3.4375 16.5625C3.14583 16.2708 3 15.9167 3 15.5V4.5C3 4.08333 3.14583 3.72917 3.4375 3.4375C3.72917 3.14583 4.08333 3 4.5 3H15.5C15.9167 3 16.2708 3.14583 16.5625 3.4375C16.8542 3.72917 17 4.08333 17 4.5V15.5C17 15.9167 16.8542 16.2708 16.5625 16.5625C16.2708 16.8542 15.9167 17 15.5 17H4.5Z"/>
+		</svg>
+	);
+};
+export default IconCodeBlocks;

frontend/src/icons/IconGemma.jsx ADDED Viewed

	@@ -0,0 +1,50 @@

+/*
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+import React from 'react';
+const IconGemma = ({className}) => (
+	<svg
+		width="107"
+		height="25"
+		viewBox="60 0 110 25"
+		fill="none"
+		xmlns="http://www.w3.org/2000/svg"
+		className={className}
+		aria-hidden="true"
+	>
+		<mask id="mask0_7159_1111" style={{maskType: 'luminance'}} maskUnits="userSpaceOnUse" x="-14" y="0" width="208"
+		      height="25">
+			<path d="M193.588 0H-13.2987V24.6199H193.588V0Z" fill="white"/>
+		</mask>
+		<g mask="url(#mask0_7159_1111)">
+			<path
+				d="M72.5851 23.7369C70.9769 23.7369 69.452 23.4391 68.0106 22.8474C66.5929 22.2558 65.3421 21.4179 64.262 20.3378C63.1819 19.2577 62.3361 17.9989 61.7206 16.5575C61.1051 15.116 60.7993 13.5514 60.7993 11.8558C60.7993 10.1602 61.1051 8.59568 61.7206 7.15422C62.3361 5.71276 63.1819 4.45397 64.262 3.37387C65.3421 2.29377 66.5929 1.4559 68.0106 0.864226C69.452 0.272553 70.9769 -0.0252686 72.5851 -0.0252686C74.1934 -0.0252686 75.8254 0.272553 77.2867 0.864226C78.7679 1.4559 79.987 2.29377 80.94 3.37387L79.034 5.27993C78.5455 4.68826 77.9658 4.18792 77.2867 3.78685C76.6077 3.38578 75.877 3.07605 75.0948 2.86559C74.3125 2.65513 73.4865 2.54791 72.6169 2.54791C71.4097 2.54791 70.2542 2.77029 69.1542 3.21503C68.0543 3.63993 67.0774 4.26337 66.2316 5.08932C65.4056 5.89543 64.7504 6.86831 64.262 8.01195C63.7736 9.15559 63.5313 10.4382 63.5313 11.8558C63.5313 13.2735 63.7736 14.5561 64.262 15.6997C64.7703 16.8434 65.4493 17.8282 66.2951 18.6541C67.1409 19.4602 68.1059 20.0837 69.186 20.5284C70.2859 20.9533 71.4296 21.1638 72.6169 21.1638C73.6334 21.1638 74.6182 21.0248 75.5713 20.7508C76.5442 20.453 77.4138 20.0082 78.1762 19.4165C78.9585 18.801 79.6058 18.0188 80.1141 17.0657C80.6223 16.1127 80.9281 14.9691 81.0353 13.6348H72.6487V11.1252H83.5767C83.6204 11.3992 83.6522 11.6652 83.672 11.9194C83.7157 12.1735 83.7356 12.4475 83.7356 12.7453V12.7771C83.7356 14.4092 83.4616 15.9022 82.9096 17.2563C82.3576 18.5906 81.5873 19.7461 80.5906 20.719C79.5939 21.672 78.4185 22.4146 77.0644 22.9427C75.7103 23.4709 74.2172 23.7369 72.5851 23.7369ZM92.9005 23.7369C91.332 23.7369 89.9342 23.3676 88.7072 22.6251C87.5 21.8825 86.547 20.8659 85.8481 19.5754C85.1691 18.2848 84.8316 16.8116 84.8316 15.1597C84.8316 13.615 85.1492 12.1854 85.7846 10.871C86.4398 9.55665 87.3531 8.50832 88.5166 7.72604C89.7039 6.91994 91.0898 6.51887 92.6782 6.51887C94.2665 6.51887 95.6643 6.88023 96.808 7.59897C97.9714 8.29786 98.8609 9.27074 99.4764 10.5216C100.112 11.7724 100.429 13.202 100.429 14.8102C100.429 14.9572 100.418 15.1081 100.398 15.255C100.398 15.4019 100.386 15.529 100.366 15.6362H87.5318C87.5953 16.7997 87.8614 17.7845 88.326 18.5906C88.8541 19.4999 89.5332 20.179 90.3591 20.6237C91.2049 21.0685 92.0865 21.2908 92.9958 21.2908C94.1832 21.2908 95.156 21.0168 95.9185 20.4649C96.7007 19.8931 97.3282 19.1942 97.7928 18.3682L100.08 19.4801C99.4447 20.7071 98.5353 21.7237 97.348 22.5298C96.1607 23.3359 94.6795 23.7369 92.9005 23.7369ZM87.6906 13.4125H97.5704C97.5505 12.9677 97.4433 12.4912 97.2527 11.9829C97.082 11.4548 96.796 10.9663 96.395 10.5216C96.0138 10.057 95.5174 9.68372 94.9019 9.40973C94.3102 9.1119 93.5677 8.96498 92.6782 8.96498C91.6179 8.96498 90.6966 9.23897 89.9144 9.79094C89.1519 10.3191 88.5682 11.0497 88.1672 11.9829C87.9567 12.4277 87.7979 12.9042 87.6906 13.4125ZM102.002 23.2287V7.02715H104.575V9.40973H104.702C105 8.88159 105.401 8.40507 105.909 7.98018C106.438 7.53543 107.033 7.18599 107.688 6.93185C108.367 6.65785 109.054 6.51887 109.753 6.51887C110.96 6.51887 111.997 6.81669 112.867 7.40836C113.736 7.98018 114.36 8.73069 114.741 9.66387C115.293 8.75452 116.031 8.00004 116.965 7.40836C117.898 6.81669 119.01 6.51887 120.3 6.51887C122.226 6.51887 123.648 7.1026 124.557 8.26609C125.466 9.42958 125.923 10.9544 125.923 12.8406V23.2287H123.255V13.2536C123.255 11.6851 122.937 10.5851 122.301 9.94978C121.666 9.29457 120.765 8.96498 119.601 8.96498C118.775 8.96498 118.033 9.20721 117.378 9.69563C116.742 10.1602 116.234 10.7876 115.853 11.5699C115.491 12.3324 115.313 13.1702 115.313 14.0796V23.2287H112.612V13.2854C112.612 11.7169 112.295 10.605 111.659 9.94978C111.024 9.29457 110.134 8.96498 108.991 8.96498C108.165 8.96498 107.422 9.20721 106.767 9.69563C106.132 10.1841 105.623 10.8194 105.242 11.6017C104.881 12.384 104.702 13.2338 104.702 14.1431V23.2287H102.002ZM128.21 23.2287V7.02715H130.783V9.40973H130.911C131.208 8.88159 131.609 8.40507 132.118 7.98018C132.646 7.53543 133.241 7.18599 133.897 6.93185C134.576 6.65785 135.263 6.51887 135.962 6.51887C137.169 6.51887 138.205 6.81669 139.075 7.40836C139.944 7.98018 140.568 8.73069 140.949 9.66387C141.501 8.75452 142.24 8.00004 143.173 7.40836C144.106 6.81669 145.218 6.51887 146.508 6.51887C148.434 6.51887 149.856 7.1026 150.765 8.26609C151.675 9.42958 152.131 10.9544 152.131 12.8406V23.2287H149.463V13.2536C149.463 11.6851 149.145 10.5851 148.51 9.94978C147.874 9.29457 146.973 8.96498 145.81 8.96498C144.984 8.96498 144.241 9.20721 143.586 9.69563C142.95 10.1602 142.442 10.7876 142.061 11.5699C141.7 12.3324 141.521 13.1702 141.521 14.0796V23.2287H138.821V13.2854C138.821 11.7169 138.503 10.605 137.868 9.94978C137.232 9.29457 136.343 8.96498 135.199 8.96498C134.373 8.96498 133.631 9.20721 132.975 9.69563C132.34 10.1841 131.832 10.8194 131.451 11.6017C131.089 12.384 130.911 13.2338 130.911 14.1431V23.2287H128.21ZM159.442 23.7369C158.235 23.7369 157.174 23.5027 156.265 23.0381C155.356 22.5735 154.633 21.9381 154.105 21.132C153.597 20.306 153.342 19.3729 153.342 18.3364C153.342 17.1491 153.648 16.1564 154.264 15.3503C154.879 14.5243 155.705 13.9088 156.742 13.5078C157.778 13.0829 158.922 12.8724 160.173 12.8724C160.891 12.8724 161.558 12.9359 162.174 13.063C162.789 13.1702 163.318 13.3052 163.762 13.476C164.227 13.6229 164.576 13.7738 164.811 13.9207V12.9359C164.811 11.7089 164.378 10.7321 163.508 10.0133C162.638 9.29457 161.582 8.93321 160.331 8.93321C159.442 8.93321 158.604 9.13573 157.822 9.5368C157.059 9.91801 156.456 10.4581 156.011 11.1569L153.978 9.6321C154.403 8.99675 154.931 8.44478 155.566 7.98018C156.202 7.51558 156.92 7.15422 157.726 6.90008C158.552 6.64594 159.422 6.51887 160.331 6.51887C162.535 6.51887 164.259 7.1026 165.509 8.26609C166.76 9.42958 167.384 10.9981 167.384 12.9677V23.2287H164.811V20.9096H164.684C164.41 21.3742 164.016 21.8309 163.508 22.2756C163 22.7005 162.396 23.05 161.697 23.324C161.018 23.598 160.268 23.7369 159.442 23.7369ZM159.696 21.3544C160.629 21.3544 161.475 21.1201 162.237 20.6555C163.02 20.1909 163.647 19.5635 164.112 18.7812C164.576 17.9989 164.811 17.1412 164.811 16.208C164.322 15.8705 163.711 15.5925 162.968 15.382C162.249 15.1716 161.455 15.0644 160.585 15.0644C159.041 15.0644 157.905 15.382 157.186 16.0174C156.468 16.6528 156.106 17.435 156.106 18.3682C156.106 19.3014 156.444 19.9764 157.123 20.5284C157.802 21.0804 158.66 21.3544 159.696 21.3544Z"
+				fill="url(#paint0_linear_7159_1111)"/>
+		</g>
+		<defs>
+			<linearGradient id="paint0_linear_7159_1111" x1="112.14" y1="29.0382" x2="116.119" y2="4.52148"
+			                gradientUnits="userSpaceOnUse">
+				<stop stopColor="#3B6BFF"/>
+				<stop offset="0.59" stopColor="#2E96FF"/>
+				<stop offset="1" stopColor="#ACB7FF"/>
+			</linearGradient>
+		</defs>
+	</svg>
+);
+export default IconGemma;