chandru1652 commited on
Commit
81cdd5f
·
1 Parent(s): fdee41f

Initial public commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. .gitignore +6 -0
  3. Dockerfile +90 -0
  4. README.md +15 -5
  5. backend/app.py +143 -0
  6. backend/background_task_manager.py +76 -0
  7. backend/cache_manager.py +67 -0
  8. backend/case_util.py +223 -0
  9. backend/config.py +38 -0
  10. backend/data/reports/1.txt +1 -0
  11. backend/data/reports/2.txt +1 -0
  12. backend/data/reports_manifest.csv +3 -0
  13. backend/data/who_chestxray_guideline_9241546778_eng.pdf +3 -0
  14. backend/default_cache/README.md +1 -0
  15. backend/default_cache/rad-learn-cache.zip +3 -0
  16. backend/llm_client.py +291 -0
  17. backend/models.py +108 -0
  18. backend/prompts.py +165 -0
  19. backend/rag/__init__.py +0 -0
  20. backend/rag/knowledge_base.py +568 -0
  21. backend/rag/model_manager.py +102 -0
  22. backend/rag/rag_context_engine.py +226 -0
  23. backend/rag/siglip_embedder.py +59 -0
  24. backend/requirements.txt +47 -0
  25. backend/routes.py +218 -0
  26. frontend/index.html +29 -0
  27. frontend/package.json +22 -0
  28. frontend/public/index.html +35 -0
  29. frontend/public/vite.svg +0 -0
  30. frontend/src/App.css +90 -0
  31. frontend/src/App.jsx +107 -0
  32. frontend/src/assets/home_chest_logo.jpg +0 -0
  33. frontend/src/components/ChatMessage.jsx +40 -0
  34. frontend/src/components/ChatMessage.module.css +70 -0
  35. frontend/src/components/DetailsOverlay.jsx +122 -0
  36. frontend/src/components/DetailsOverlay.module.css +143 -0
  37. frontend/src/components/JourneyCard.jsx +36 -0
  38. frontend/src/components/JourneyCard.module.css +80 -0
  39. frontend/src/components/MCQOption.jsx +41 -0
  40. frontend/src/components/MCQOption.module.css +56 -0
  41. frontend/src/components/RedactedTextView.js +42 -0
  42. frontend/src/components/TextWithTooltips.jsx +47 -0
  43. frontend/src/data/constants.js +23 -0
  44. frontend/src/data/medicalTerms.js +32 -0
  45. frontend/src/icons/IconArticlePerson.jsx +38 -0
  46. frontend/src/icons/IconAstrophotography.jsx +31 -0
  47. frontend/src/icons/IconBackArrow.jsx +35 -0
  48. frontend/src/icons/IconClose.jsx +28 -0
  49. frontend/src/icons/IconCodeBlocks.jsx +37 -0
  50. frontend/src/icons/IconGemma.jsx +50 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ backend/data/images/ filter=lfs diff=lfs merge=lfs -text
37
+ backend/data/images/1.png filter=lfs diff=lfs merge=lfs -text
38
+ backend/data/images/2.png filter=lfs diff=lfs merge=lfs -text
39
+ backend/data/who_chestxray_guideline_9241546778_eng.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ .venv/
3
+ .idea/*
4
+ .DS_Store
5
+ /frontend/node_modules/
6
+ /frontend/package-lock.json
Dockerfile ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ # --- Stage 1: Build the React Frontend ---
17
+ FROM node:20-slim AS frontend-builder
18
+ WORKDIR /app
19
+ # Add a build argument to force a rebuild (and not use cache) when new code is pushed
20
+ ARG CACHE_BUSTER=1
21
+ COPY frontend/package.json ./
22
+ RUN npm install
23
+ COPY frontend/ .
24
+ RUN npm run build
25
+
26
+ # --- Stage 2: Build the Final Production Image with Flask ---
27
+ FROM python:3.10-slim
28
+ ENV PYTHONUNBUFFERED=1
29
+ ENV CACHE_DIR=/data/cache
30
+
31
+ # Set the NLTK data path environment variable.
32
+ # This tells NLTK where to look for data for ALL users.
33
+ ENV NLTK_DATA=/usr/local/share/nltk_data
34
+
35
+ # Install system dependencies first, as they change less frequently
36
+ RUN apt-get update && \
37
+ apt-get install -y unzip --no-install-recommends
38
+
39
+ RUN useradd -m -s /bin/bash -u 1000 user
40
+ WORKDIR /app
41
+
42
+ # Copy and install Python requirements from the backend folder
43
+ COPY --chown=user:user backend/requirements.txt .
44
+ RUN pip install --no-cache-dir -r requirements.txt
45
+
46
+ # Download the required NLTK data.
47
+ # This command downloads it to the directory specified by $NLTK_DATA.
48
+ RUN python -m nltk.downloader -d $NLTK_DATA punkt punkt_tab
49
+
50
+ # Copy the entire backend application code
51
+ COPY --chown=user:user backend/ .
52
+
53
+ # Copy the built frontend from the first stage into the correct directory
54
+ COPY --chown=user:user --from=frontend-builder /app/dist ./frontend/dist
55
+
56
+ RUN mkdir -p $CACHE_DIR
57
+ RUN chmod -R 777 $CACHE_DIR
58
+
59
+ # Define the path to your potential zip file
60
+ ENV ZIP_FILE_PATH ./default_cache/rad-learn-cache.zip
61
+
62
+ # Conditionally unzip the file
63
+ RUN if [ -f "$ZIP_FILE_PATH" ]; then \
64
+ unzip -o "$ZIP_FILE_PATH" -d $CACHE_DIR && \
65
+ chmod -R 777 $CACHE_DIR && \
66
+ rm "$ZIP_FILE_PATH"; \
67
+ fi
68
+
69
+ RUN mkdir /app/persistent_cache \
70
+ /app/processed_figures_kb \
71
+ /app/chroma_db_store
72
+
73
+ RUN chown user:user /app/persistent_cache \
74
+ /app/processed_figures_kb \
75
+ /app/chroma_db_store
76
+
77
+ # Switch to the non-root user for security
78
+ USER user
79
+
80
+ # Expose the port
81
+ EXPOSE 7860
82
+
83
+ # Run the production server
84
+ CMD ["gunicorn", \
85
+ "--bind", "0.0.0.0:7860", \
86
+ "--timeout", "600", \
87
+ "--worker-class", "gthread", \
88
+ "--workers", "1", \
89
+ "--threads", "4", \
90
+ "app:app"]
README.md CHANGED
@@ -1,12 +1,22 @@
1
  ---
2
- title: Rad Learning Companion
3
  emoji: 🏃
4
- colorFrom: red
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
- short_description: Radiology Learning Companion Demo - built with MedGemma
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Radiology Learning Companion
3
  emoji: 🏃
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
+ short_description: A demo showcasing a medical learning experience of CXR image
10
  ---
11
 
12
+ **Radiology Learning Companion Demo Built with MedGemma**
13
+
14
+ Imagine a learning environment where interacting directly with a Chest X-Ray (CXR) image significantly boosts your understanding. That's precisely what the Radiology Learning Companion Demo offers. This web application is an interactive educational tool tailored for medical students—to hone their radiological assessment skills for CXRs.
15
+
16
+ Radiology Learning Companion Demo demonstrates how to harness MedGemma's multimodal capabilities, combining medical image interpretation and robust medical reasoning. In this demo we show that users can start by selecting an image from a library of 2 CXRs. Developers can build their own library of images. This demo uses MedGemma's internal radiological assessment hypothesis and relevant clinical guidelines, and presents the user with a series of targeted multiple-choice questions.
17
+
18
+ After the user goes through their learning journey, Radiology Learning Companion Demo reveals its own interpretation, providing a clear rationale based on CXR findings and established guidelines. It then offers a comparative analysis against your responses, designed to deepen your understanding and validate your clinical observations.
19
+
20
+ You as a developer can use this approach to include other guidelines using RAG or other prompts and context to tailor and build such a learning companion.
21
+
22
+ *Note: This demo utilizes non-DICOM Chest X-Ray (CXR) images, each paired with a curated single condition label. Our labeling strategy prioritizes educationally relevant findings to power a focused and effective simulated learning experience for demo purpose only. This demonstration is solely for illustrative purposes and doesn't represent a finished or approved product. It does not comply with any harmonized regulations or standards for quality, safety, or efficacy. Any real-world application would require further development, training, and adaptation.*
backend/app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import logging
17
+ import os
18
+ import sys
19
+
20
+ from flask import Flask, send_from_directory
21
+
22
+ import case_util
23
+ import config
24
+ from llm_client import VertexAILLMClient
25
+ from llm_client import HuggingFaceLLMClient
26
+ from background_task_manager import BackgroundTaskManager
27
+ from cache_manager import CacheManager
28
+ from rag.knowledge_base import KnowledgeBase
29
+ from rag.model_manager import ModelManager
30
+ from rag.rag_context_engine import RAGContextEngine, format_context_messages_to_string
31
+ from routes import main_bp
32
+
33
+
34
+ def _get_llm_client():
35
+ """Initializes the LLM client and handles exit on failure."""
36
+ logger = logging.getLogger(__name__)
37
+
38
+ if config.MEDGEMMA_LOCATION == 'HUGGING_FACE':
39
+ logger.info("HUGGING_FACE MedGemma end point initialized.")
40
+ return HuggingFaceLLMClient(config.HF_TOKEN, config.MEDGEMMA_ENDPOINT_URL)
41
+ elif config.MEDGEMMA_LOCATION == 'VERTEX_AI':
42
+ logger.info("Vertex AI MedGemma end point initialized.")
43
+ return VertexAILLMClient(config.GCLOUD_SA_KEY, config.MEDGEMMA_ENDPOINT_URL)
44
+
45
+ logger.critical("LLM client failed to initialize. API calls will fail.")
46
+ sys.exit("Exiting: LLM client initialization failed.")
47
+
48
+ def _initialize_rag_system(flask_app: Flask):
49
+ """Checks for persistent cache and initializes the RAG system."""
50
+ logger = logging.getLogger(__name__)
51
+ rag_context_cache = {}
52
+
53
+ # RAG Run is not needed if cache is present.
54
+ if config.USE_CACHE:
55
+ cache_manager = flask_app.config['DEMO_CACHE']
56
+ if len(cache_manager.cache) > 0:
57
+ logger.warning(f"The cache is not empty, so not initialising the RAG system.")
58
+ return
59
+ else:
60
+ logger.info(f"The cache is empty, so resuming the RAG initialisation")
61
+
62
+ try:
63
+ logger.info("--- Initializing RAG System and pre-fetching context... ---")
64
+ rag_model_manager = ModelManager()
65
+ rag_models = rag_model_manager.load_models()
66
+ if not rag_models.get("embedder"): raise RuntimeError("RAG embedder failed to load.")
67
+
68
+ knowledge_base = KnowledgeBase(models=rag_models)
69
+ knowledge_base.build(pdf_filepath=config.GUIDELINE_PDF_PATH)
70
+ if not knowledge_base.retriever: raise RuntimeError("Failed to build the RAG retriever.")
71
+
72
+ rag_engine = RAGContextEngine(knowledge_base=knowledge_base)
73
+
74
+ all_cases = flask_app.config.get("AVAILABLE_REPORTS", {})
75
+ for case_id, case_data in all_cases.items():
76
+ ground_truth_labels = case_data.ground_truth_labels
77
+ if not ground_truth_labels: continue
78
+ rag_queries = [label.lower() for label in ground_truth_labels.keys()]
79
+ if "normal" in rag_queries: continue
80
+ retrieved_docs = rag_engine.retrieve_context_docs_for_simple_queries(rag_queries)
81
+ citations = sorted(list(
82
+ set(doc.metadata.get("page_number") for doc in retrieved_docs if doc.metadata.get("page_number"))))
83
+ context_messages, _ = rag_engine.build_context_messages(retrieved_docs)
84
+ context_string = format_context_messages_to_string(context_messages)
85
+ rag_context_cache[case_id] = {"context_string": context_string, "citations": citations}
86
+
87
+ logger.info("✅ RAG System ready.")
88
+ except Exception as e:
89
+ logger.critical(f"FATAL: RAG System failed to initialize: {e}", exc_info=True)
90
+ sys.exit("Exiting: RAG system initialization failed.")
91
+
92
+ flask_app.config['RAG_CONTEXT_CACHE'] = rag_context_cache
93
+
94
+
95
+ def _initialize_demo_cache(flask_app: Flask):
96
+ """Initializes the disk cache for MCQs and summary templates."""
97
+ logger = logging.getLogger(__name__)
98
+ if config.USE_CACHE:
99
+ cache_dir = os.getenv('CACHE_DIR', config.BASE_DIR / "persistent_cache")
100
+ cache_manager = CacheManager(cache_dir)
101
+ flask_app.config['DEMO_CACHE'] = cache_manager
102
+ logger.info("✅ Cache Setup Complete.")
103
+ else:
104
+ logger.warning("⚠️ Caching is DISABLED.")
105
+ flask_app.config['DEMO_CACHE'] = None
106
+
107
+
108
+ def _register_routes(flask_app: Flask):
109
+ """Registers blueprints and defines static file serving."""
110
+ flask_app.register_blueprint(main_bp)
111
+
112
+ @flask_app.route('/', defaults={'path': ''})
113
+ @flask_app.route('/<path:path>')
114
+ def serve(path):
115
+ if path != "" and os.path.exists(os.path.join(flask_app.static_folder, path)):
116
+ return send_from_directory(flask_app.static_folder, path)
117
+ else:
118
+ return send_from_directory(flask_app.static_folder, 'index.html')
119
+
120
+
121
+ def create_app():
122
+ """Creates and configures the Flask application by calling modular helper functions."""
123
+ application = Flask(__name__, static_folder=config.STATIC_DIR)
124
+
125
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(name)s] - %(message)s')
126
+
127
+ # Sequentially call setup functions
128
+ application.config["LLM_CLIENT"] = _get_llm_client()
129
+ application.config["AVAILABLE_REPORTS"] = case_util.get_available_reports(config.MANIFEST_CSV_PATH)
130
+ _initialize_demo_cache(application)
131
+ task_manager = BackgroundTaskManager()
132
+ application.config['TASK_MANAGER'] = task_manager
133
+ # RAG and Cache initialization in the background
134
+ task_manager.start_task(key="rag_system", target_func=_initialize_rag_system, flask_app=application)
135
+ _register_routes(application)
136
+
137
+ return application
138
+
139
+
140
+ app = create_app()
141
+
142
+ if __name__ == '__main__':
143
+ app.run(host='0.0.0.0', port=7860, debug=True)
backend/background_task_manager.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import threading
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class BackgroundTaskManager:
22
+ """A simple manager to run and track background initialization tasks."""
23
+
24
+ def __init__(self):
25
+ self.tasks = {}
26
+ self.results = {}
27
+ self.errors = {}
28
+ self._lock = threading.Lock()
29
+
30
+ def _task_wrapper(self, key, target_func, *args, **kwargs):
31
+ """A wrapper to run the target function and store its result or exception."""
32
+ logger.info(f"Background task '{key}' started.")
33
+ try:
34
+ result = target_func(*args, **kwargs)
35
+ with self._lock:
36
+ self.results[key] = result
37
+ logger.info(f"✅ Background task '{key}' finished successfully.")
38
+ except Exception as e:
39
+ with self._lock:
40
+ self.errors[key] = e
41
+ logger.critical(f"❌ Background task '{key}' failed with an exception.", exc_info=True)
42
+
43
+ def start_task(self, key, target_func, *args, **kwargs):
44
+ """Starts a new background task in a daemon thread."""
45
+ if key in self.tasks:
46
+ logger.warning(f"Task '{key}' is already running.")
47
+ return
48
+
49
+ thread = threading.Thread(
50
+ target=self._task_wrapper,
51
+ args=(key, target_func) + args,
52
+ kwargs=kwargs,
53
+ daemon=True # Daemon threads exit when the main app exits
54
+ )
55
+ with self._lock:
56
+ self.tasks[key] = thread
57
+ thread.start()
58
+
59
+ def is_task_running(self, key):
60
+ """Checks if a specific task is still active."""
61
+ with self._lock:
62
+ return self.tasks.get(key) and self.tasks[key].is_alive()
63
+
64
+ def is_task_done(self, key):
65
+ """Checks if a task has completed (successfully or with an error)."""
66
+ with self._lock:
67
+ result = key in self.results or key in self.errors
68
+ if not result:
69
+ logger.info(f"self.results: {self.results}")
70
+ logger.info(f"self.errors: {self.errors}")
71
+ return result
72
+
73
+ def get_error(self, key):
74
+ """Returns the exception for a failed task, if any."""
75
+ with self._lock:
76
+ return self.errors.get(key)
backend/cache_manager.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from dataclasses import asdict
17
+ from pathlib import Path
18
+
19
+ import diskcache as dc
20
+
21
+ from models import ClinicalMCQ, CaseSummary
22
+
23
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class CacheManager:
28
+ """
29
+ Manages a persistent, on-disk cache for the demo using diskcache.
30
+ This class is thread-safe and process-safe.
31
+ """
32
+
33
+ def __init__(self, cache_directory: str | Path):
34
+ self.cache_directory = cache_directory
35
+ self.cache = dc.Cache(str(cache_directory))
36
+ logger.info(f"✅ DemoCacheManager initialized. Cache directory: {cache_directory}")
37
+
38
+ def get_all_mcqs_sequence(self, case_id: str) -> list[ClinicalMCQ] | None:
39
+ """Retrieves the list of MCQs for a case."""
40
+ mcq_list = self.cache.get(f"{case_id}_full_mcqs")
41
+ if mcq_list is not None:
42
+ return [ClinicalMCQ(**data) for data in mcq_list]
43
+ return []
44
+
45
+ def add_all_mcqs_to_case(self, case_id: str, all_mcqs: list[ClinicalMCQ]):
46
+ """Set the list of MCQs to the given case in the cache."""
47
+ with self.cache.transact():
48
+ list_of_mcqs = [asdict(mcq) for mcq in all_mcqs]
49
+ self.cache.set(f"{case_id}_full_mcqs", list_of_mcqs)
50
+ logger.info(f"✅ Cache updated for case '{case_id}' with all MCQs.")
51
+
52
+ def get_summary_template(self, case_id: str) -> CaseSummary | None:
53
+ """Retrieves the summary template for a case."""
54
+ template_dict = self.cache.get(f"{case_id}_summary_template")
55
+ if template_dict:
56
+ try:
57
+ # The rationale will be empty in the template
58
+ return CaseSummary.from_dict(template_dict)
59
+ except (TypeError, KeyError):
60
+ logger.error("Deserialization of the cached summary template failed.")
61
+ return None
62
+ return None
63
+
64
+ def save_summary_template(self, case_id: str, template: CaseSummary):
65
+ """Saves a summary template to the cache."""
66
+ self.cache.set(f"{case_id}_summary_template", asdict(template))
67
+ logger.info(f"✅ Summary template saved for case '{case_id}'.")
backend/case_util.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import csv
16
+ import json
17
+ import logging
18
+ import random
19
+ import re
20
+ from dataclasses import replace
21
+ from pathlib import Path
22
+
23
+ from config import BASE_DIR, RANDOMIZE_CHOICES
24
+ from models import Case, CaseSummary, AnswerLog, ConversationTurn, QuestionOutcome, ClinicalMCQ
25
+
26
+ # --- Configuration ---
27
+ # Configure basic logging (optional, adjust as needed)
28
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def fetch_report(report_path: Path):
33
+ """Report file reading utility function."""
34
+ try:
35
+ with open(report_path, 'r') as f:
36
+ report = json.load(f)
37
+ logger.info(f"Successfully loaded '{report_path}' into memory.")
38
+ return report
39
+ except FileNotFoundError:
40
+ logger.error(f"ERROR: Could not find report file: {report_path}")
41
+ return ""
42
+
43
+
44
+ def get_available_reports(reports_csv_path: Path):
45
+ """Reads available reports as Cases for this demo."""
46
+ available_reports: dict[str, Case] = {}
47
+ if reports_csv_path.is_file():
48
+ try:
49
+ with (open(reports_csv_path, mode='r', encoding='utf-8') as csvfile):
50
+ reader = csv.DictReader(csvfile)
51
+ required_headers = {'case_id', 'case_condition_name', 'report_path', 'download_image_url', 'findings'}
52
+ if not required_headers.issubset(reader.fieldnames):
53
+ logger.error(
54
+ f"CSV file {reports_csv_path} is missing one or more required headers: {required_headers - set(reader.fieldnames)}"
55
+ )
56
+ else:
57
+ for row in reader:
58
+ case_id = row['case_id']
59
+ condition_name = row['case_condition_name']
60
+ report_path_from_csv = row['report_path'] # e.g., static/reports/report1.txt or empty
61
+ download_image_url_from_csv = row['download_image_url']
62
+ potential_findings = row['findings']
63
+
64
+ # Construct absolute path for report file validation (paths from CSV are relative to BASE_DIR)
65
+ abs_report_path_to_check = BASE_DIR / report_path_from_csv
66
+ if not abs_report_path_to_check.is_file():
67
+ logger.warning(
68
+ f"Image file not found for case '{case_id}' at '{abs_report_path_to_check}'. Skipping this entry.")
69
+ continue
70
+
71
+ if download_image_url_from_csv is None or download_image_url_from_csv == "":
72
+ logger.warning(
73
+ f"Download image url not found for case '{case_id}'. Skipping this entry.")
74
+ continue
75
+
76
+ ground_truth_labels = fetch_report(report_path_from_csv)
77
+ case = Case(
78
+ id=case_id,
79
+ condition_name=condition_name,
80
+ ground_truth_labels=ground_truth_labels,
81
+ download_image_url=download_image_url_from_csv,
82
+ potential_findings=potential_findings,
83
+ )
84
+ available_reports[str(case_id)] = case
85
+ logger.info(f"Loaded {len(available_reports)} report/image pairs from CSV.")
86
+
87
+ except Exception as e:
88
+ logger.error(f"Error reading or processing CSV file {reports_csv_path}: {e}", exc_info=True)
89
+ else:
90
+ logger.warning(f"Manifest CSV file not found at {reports_csv_path}. AVAILABLE_REPORTS will be empty.")
91
+ return available_reports
92
+
93
+
94
+ def get_json_from_model_response(response_text: str) -> dict:
95
+ """
96
+ Robustly parses a JSON object from a response that may contain it
97
+ within a markdown code block.
98
+ """
99
+ # This regex now looks for a JSON object starting with { and ending with }
100
+ json_match = re.search(r"```json\s*(\{.*?\})\s*```", response_text, re.DOTALL)
101
+ if json_match:
102
+ json_str = json_match.group(1)
103
+ try:
104
+ return json.loads(json_str)
105
+ except json.JSONDecodeError as e:
106
+ logger.error(f"Failed to decode JSON after extraction: {e}")
107
+ raise Exception(f"Could not parse JSON from extracted block: {json_str}")
108
+
109
+ # Fallback if the model misses the markdown block
110
+ logger.warning("Could not find a ```json block. Falling back to raw search.")
111
+ json_match_fallback = re.search(r"(\{.*\})", response_text, re.DOTALL)
112
+ if json_match_fallback:
113
+ return json.loads(json_match_fallback.group(1))
114
+
115
+ raise Exception(f"Could not find or parse JSON object in the API response: {response_text}")
116
+
117
+
118
+ def get_potential_findings(case: Case) -> str:
119
+ """Get potential findings for a case."""
120
+ return case.potential_findings
121
+
122
+
123
+ def build_summary_template(case: Case, rag_cache: dict) -> CaseSummary:
124
+ """Builds summary template with static data like potential_findings, guideline_resources and condition."""
125
+ citation_string = "" # Default
126
+ rag_data = rag_cache.get(case.id, {})
127
+ citations = rag_data.get("citations", [])
128
+ if citations:
129
+ citation_string = ', '.join(map(str, citations))
130
+
131
+ return CaseSummary(
132
+ med_gemma_interpretation="",
133
+ potential_findings=get_potential_findings(case),
134
+ rationale=[],
135
+ guideline_specific_resource=citation_string,
136
+ condition=case.condition_name
137
+ )
138
+
139
+
140
+ def populate_rationale(summary_template: CaseSummary, conversation_history: list[ConversationTurn]) -> CaseSummary:
141
+ """Populates rationale and interpretation depending on user journey."""
142
+ correct_count = 0
143
+ total_questions = len(conversation_history)
144
+ rationale_logs = []
145
+
146
+ for turn in conversation_history:
147
+ question = turn.clinicalMcq.question
148
+ choices = turn.clinicalMcq.choices
149
+ model_answer_key = turn.clinicalMcq.answer
150
+ user_attempt1_key = turn.userResponse.attempt1
151
+ user_attempt2_key = turn.userResponse.attempt2
152
+ correct_answer_text = choices.get(model_answer_key, f"N/A - Model Answer Key '{model_answer_key}' not found.")
153
+
154
+ outcomes = []
155
+ if user_attempt1_key != model_answer_key and user_attempt2_key != model_answer_key:
156
+ user_attempt_key = user_attempt2_key if user_attempt2_key else user_attempt1_key
157
+ incorrect_text = choices[user_attempt_key]
158
+ outcomes.append(QuestionOutcome(type="Incorrect", text=incorrect_text))
159
+ else:
160
+ correct_count += 1
161
+ outcomes.append(QuestionOutcome(type="Correct", text=correct_answer_text))
162
+
163
+ rationale_logs.append(AnswerLog(question=question, outcomes=outcomes))
164
+
165
+ accuracy = (correct_count / total_questions) * 100 if total_questions > 0 else 0
166
+
167
+ if accuracy == 100:
168
+ interpretation = f"Wonderful job! You achieved a perfect score of {accuracy:.0f}%, correctly identifying all key findings on your first attempt."
169
+ elif accuracy >= 50:
170
+ interpretation = f"Good job. You scored {accuracy:.0f}%, showing a solid understanding of the key findings for this case."
171
+ else:
172
+ interpretation = f"This was a challenging case, and you scored {accuracy:.0f}%. More preparation is needed. Review the rationale below for details."
173
+
174
+ return CaseSummary(
175
+ med_gemma_interpretation=interpretation,
176
+ potential_findings=summary_template.potential_findings,
177
+ rationale=rationale_logs,
178
+ guideline_specific_resource=summary_template.guideline_specific_resource,
179
+ condition=summary_template.condition,
180
+ )
181
+
182
+
183
+ def randomize_mcqs(original_mcqs: list[ClinicalMCQ]) -> list[ClinicalMCQ]:
184
+ """
185
+ Takes a list of clinical MCQs and randomizes their answer choices.
186
+ If an error occurs while randomizing a question, it returns the original question
187
+ in its place and continues.
188
+ """
189
+ if not RANDOMIZE_CHOICES:
190
+ return original_mcqs
191
+ randomized_questions = []
192
+
193
+ for q in original_mcqs:
194
+ try:
195
+ # --- Step 1: Identify the correct answer's text ---
196
+ # Before shuffling, we save the actual string of the correct answer.
197
+ correct_answer_text = q.choices[q.answer]
198
+
199
+ # --- Step 2: Shuffle the choice values ---
200
+ # We extract the choice texts into a list and shuffle them in place.
201
+ choice_texts = list(q.choices.values())
202
+ random.shuffle(choice_texts)
203
+
204
+ # --- Step 3: Rebuild choices and find the new answer key (Concise version) ---
205
+ # Pair the original sorted keys with the newly shuffled texts using zip.
206
+ keys = sorted(q.choices.keys())
207
+ new_choices = dict(zip(keys, choice_texts))
208
+
209
+ # Efficiently find the new key corresponding to the correct answer's text.
210
+ new_answer_key = next(key for key, value in new_choices.items() if value == correct_answer_text)
211
+
212
+ # --- Step 4: Create an updated, immutable copy of the question ---
213
+ # Using `dataclasses.replace` is a clean, Pythonic way to create a new
214
+ # instance with updated values, promoting immutability.
215
+ randomized_q = replace(q, choices=new_choices, answer=new_answer_key)
216
+ randomized_questions.append(randomized_q)
217
+ except Exception as e:
218
+ # If any error occurs (e.g., KeyError from a bad answer key),
219
+ # print a warning and append the original, unmodified question.
220
+ logger.warning(f"Warning: Could not randomize question '{q.id}'. Returning original. Error: {e}")
221
+ randomized_questions.append(q)
222
+
223
+ return randomized_questions
backend/config.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ from pathlib import Path
17
+
18
+ MEDGEMMA_LOCATION = os.environ.get("MEDGEMMA_LOCATION") # POSSIBLE VALUES are HUGGING_FACE, VERTEX_AI
19
+
20
+ GCLOUD_SA_KEY = os.environ.get("GCLOUD_SA_KEY", None)
21
+
22
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
23
+
24
+ USE_CACHE = os.getenv('USE_CACHE', 'true').lower() in ('true', '1', 't')
25
+ RANDOMIZE_CHOICES = os.getenv('RANDOMIZE_CHOICES', 'true').lower() in ('true', '1', 't')
26
+
27
+ BASE_DIR = Path(__file__).parent.resolve()
28
+
29
+ MEDGEMMA_ENDPOINT_URL = os.environ.get("MEDGEMMA_ENDPOINT_URL", None)
30
+
31
+ # path to the built React app's 'dist' folder
32
+ STATIC_DIR = BASE_DIR / 'frontend' / 'dist'
33
+
34
+ MANIFEST_CSV_PATH = BASE_DIR / 'data' / 'reports_manifest.csv'
35
+
36
+ MAX_NUMBER_OF_MCQ_QUESTIONS = 5
37
+
38
+ GUIDELINE_PDF_PATH = BASE_DIR / 'data' / 'who_chestxray_guideline_9241546778_eng.pdf'
backend/data/reports/1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"right pleural effusion": "yes"}
backend/data/reports/2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ {"normal": "yes"}
backend/data/reports_manifest.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ case_id,image_type,case_condition_name,download_image_url,report_path,findings
2
+ 1,CXR,Pleural Effusion,https://huggingface.co/spaces/google/rad-learn-companion-samples/resolve/main/images/1.png,data/reports/1.txt,The findings on this Chest X-Ray are suggestive of Right-sided Pleural Effusion. This is indicated by the blunting of the right costophrenic angle and the presence of fluid in the right pleural space.
3
+ 2,CXR,No Abnormalities,https://huggingface.co/spaces/google/rad-learn-companion-samples/resolve/main/images/4.png,data/reports/2.txt,"Based on the image, it appears to be a Normal Chest X-Ray. The lungs appear clear with no obvious signs of consolidation, nodules, or masses. The heart size seems normal and there are no apparent mediastinal abnormalities."
backend/data/who_chestxray_guideline_9241546778_eng.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2620eea1f60558737f9467ebf17c659a695979a5c152d00c61fb3e25e80b278
3
+ size 7193815
backend/default_cache/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Cache backup is stored here.
backend/default_cache/rad-learn-cache.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c71f42ca7294c27968449bdad1822161cc38f7f8f334523231d123d42f17826
3
+ size 7809
backend/llm_client.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import logging
17
+ import uuid
18
+
19
+ import requests
20
+
21
+ from case_util import get_json_from_model_response
22
+ from models import ClinicalMCQ
23
+ from prompts import mcq_prompt_all_questions_with_rag
24
+ from abc import ABC, abstractmethod
25
+ from google.oauth2 import service_account
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ class LLMClient(ABC):
30
+ _api_key = None
31
+ _endpoint_url = None
32
+
33
+ def generate_all_questions(self, case_data: dict, guideline_context: str) -> list[ClinicalMCQ] | None:
34
+ """
35
+ Orchestrates the prompt creation and live LLM call to generate the list of all MCQs.
36
+ Receives pre-fetched RAG context as a string.
37
+ """
38
+ # 1. Create the prompt messages payload
39
+ messages = self._create_prompt_messages_for_all_questions(
40
+ image_url=case_data.get('download_image_url'),
41
+ ground_truth_labels=case_data.get('ground_truth_labels', {}),
42
+ guideline_context=guideline_context # Pass the pre-fetched context
43
+ )
44
+
45
+ try:
46
+ # 2. Make the API call
47
+ response_dict = self._make_chat_completion_request(
48
+ model="tgi", # Or your configured model
49
+ messages=messages,
50
+ temperature=0,
51
+ max_tokens=8192
52
+ )
53
+
54
+ # 3. Safely access the list of questions from the parsed dictionary
55
+ list_of_question_dicts = response_dict.get("questions", [])
56
+
57
+ if not list_of_question_dicts:
58
+ raise ValueError("LLM response did not contain a 'questions' key or the list was empty.")
59
+
60
+ # 4. Loop through the extracted list and create ClinicalMCQ objects
61
+ list_clinical_mcq = []
62
+ for question_dict in list_of_question_dicts:
63
+ if "question" not in question_dict:
64
+ logger.warning("Skipping malformed question object in response.")
65
+ continue
66
+
67
+ mcq_uuid = str(uuid.uuid4())
68
+ clinical_mcq = ClinicalMCQ(
69
+ id=mcq_uuid,
70
+ question=question_dict.get('question', ''),
71
+ choices=question_dict.get('choices', {}),
72
+ hint=question_dict.get('hint', ''),
73
+ answer=question_dict.get('answer', ''),
74
+ rationale=question_dict.get('rationale', '')
75
+ )
76
+ list_clinical_mcq.append(clinical_mcq)
77
+
78
+ return list_clinical_mcq
79
+
80
+ except Exception as e:
81
+ logger.error(f"Failed to generate and parse learning module: {e}")
82
+ return None
83
+
84
+ @abstractmethod
85
+ def _make_chat_completion_request(
86
+ self,
87
+ model: str,
88
+ messages: list,
89
+ temperature: float,
90
+ max_tokens: int,
91
+ top_p: float | None = None,
92
+ seed: int | None = None,
93
+ stop: list[str] | str | None = None,
94
+ frequency_penalty: float | None = None,
95
+ presence_penalty: float | None = None
96
+ ) -> dict | None:
97
+ pass
98
+
99
+ def _create_prompt_messages_for_all_questions(self, image_url: str, ground_truth_labels: dict, guideline_context: str):
100
+ """
101
+ Creates the list of messages for the LLM prompt.
102
+ Dynamically selects the prompt and constructs the payload based on whether RAG context is present.
103
+ """
104
+ # The system message sets the stage and provides all instructions/examples.
105
+ system_message = {
106
+ "role": "system",
107
+ "content": [
108
+ {"type": "text", "text": mcq_prompt_all_questions_with_rag},
109
+ ]
110
+ }
111
+
112
+ user_content_text = (
113
+ f"<significant_clinical_conditions>\n{json.dumps(ground_truth_labels, indent=2)}\n</significant_clinical_conditions>\n\n"
114
+ f"<guideline_context>\n{guideline_context}\n</guideline_context>"
115
+ )
116
+
117
+ # The user message provides the specific data for THIS request and the image.
118
+ user_message = {
119
+ "role": "user",
120
+ "content": [
121
+ {"type": "image_url", "image_url": {"url": image_url}},
122
+ {"type": "text", "text": user_content_text}
123
+ ]
124
+ }
125
+
126
+ messages = [system_message, user_message]
127
+ logger.info("Messages being sent:-\n{}".format(json.dumps(messages, indent=2)))
128
+ return messages
129
+
130
+ class HuggingFaceLLMClient(LLMClient):
131
+
132
+ def __init__(self, _api_key, _endpoint_url):
133
+ if not _api_key:
134
+ raise ValueError("No API key provided.")
135
+ if not _endpoint_url:
136
+ raise ValueError("No endpoint URL provided.")
137
+
138
+ self._api_key = _api_key
139
+ self._endpoint_url = _endpoint_url
140
+
141
+ def _make_chat_completion_request(
142
+ self,
143
+ model: str,
144
+ messages: list,
145
+ temperature: float,
146
+ max_tokens: int,
147
+ top_p: float | None = None,
148
+ seed: int | None = None,
149
+ stop: list[str] | str | None = None,
150
+ frequency_penalty: float | None = None,
151
+ presence_penalty: float | None = None
152
+ ) -> dict | None:
153
+
154
+ headers = {
155
+ "Authorization": f"Bearer {self._api_key}",
156
+ "Content-Type": "application/json",
157
+ }
158
+ payload = {
159
+ "model": model,
160
+ "messages": messages,
161
+ "temperature": temperature,
162
+ "max_tokens": max_tokens,
163
+ "stream": True,
164
+ }
165
+ if top_p is not None: payload["top_p"] = top_p
166
+ if seed is not None: payload["seed"] = seed
167
+ if stop is not None: payload["stop"] = stop
168
+ if frequency_penalty is not None: payload["frequency_penalty"] = frequency_penalty
169
+ if presence_penalty is not None: payload["presence_penalty"] = presence_penalty
170
+
171
+ temp_url = self._endpoint_url.rstrip('/')
172
+ if temp_url.endswith("/v1/chat/completions"):
173
+ full_url = temp_url
174
+ elif temp_url.endswith("/v1"):
175
+ full_url = temp_url + "/chat/completions"
176
+ else:
177
+ full_url = temp_url + "/v1/chat/completions"
178
+
179
+ response = requests.post(full_url, headers=headers, json=payload, timeout=60)
180
+
181
+ logger.info(f"LLM call status code: {response.status_code}, response: {response.reason}")
182
+ explanation_parts = []
183
+ for line in response.iter_lines():
184
+ if line:
185
+ decoded_line = line.decode('utf-8')
186
+ if decoded_line.startswith('data: '):
187
+ json_data_str = decoded_line[len('data: '):].strip()
188
+ if json_data_str == "[DONE]":
189
+ break
190
+ try:
191
+ chunk = json.loads(json_data_str)
192
+ if chunk.get("choices") and chunk["choices"][0].get(
193
+ "delta") and chunk["choices"][0]["delta"].get(
194
+ "content"):
195
+ explanation_parts.append(
196
+ chunk["choices"][0]["delta"]["content"])
197
+ except json.JSONDecodeError:
198
+ logger.warning(
199
+ f"Could not decode JSON from stream chunk: {json_data_str}")
200
+ # Depending on API, might need to handle partial JSON or other errors
201
+ elif decoded_line.strip() == "[DONE]": # Some APIs might send [DONE] without "data: "
202
+ break
203
+
204
+ explanation = "".join(explanation_parts).strip()
205
+ if not explanation:
206
+ logger.warning("Empty explanation from API")
207
+ return get_json_from_model_response(explanation)
208
+
209
+ class VertexAILLMClient(LLMClient):
210
+
211
+ def __init__(self, _api_key, _endpoint_url):
212
+ if not _api_key:
213
+ raise ValueError("No API key provided.")
214
+ if not _endpoint_url:
215
+ raise ValueError("No endpoint URL provided.")
216
+
217
+ self._api_key = _api_key
218
+ self._endpoint_url = _endpoint_url
219
+
220
+ def _make_chat_completion_request(
221
+ self,
222
+ model: str,
223
+ messages: list,
224
+ temperature: float,
225
+ max_tokens: int,
226
+ top_p: float | None = None,
227
+ seed: int | None = None,
228
+ stop: list[str] | str | None = None,
229
+ frequency_penalty: float | None = None,
230
+ presence_penalty: float | None = None
231
+ ) -> dict | None:
232
+
233
+ # 1. Get credentials directly from the secret
234
+ creds = self._get_credentials_from_secret()
235
+ logger.info("Successfully loaded credentials from secret.")
236
+
237
+ # 2. Get a valid access token
238
+ token = self._get_access_token(creds)
239
+ logger.info("Successfully obtained access token.")
240
+
241
+ # 3. Use the token to make an authenticated API call
242
+ # Example: Calling a Vertex AI endpoint
243
+ headers = {
244
+ 'Authorization': f'Bearer {token}',
245
+ 'Content-Type': 'application/json'
246
+ }
247
+
248
+ payload = {
249
+ "model": model,
250
+ "messages": messages,
251
+ "temperature": temperature,
252
+ "max_tokens": max_tokens,
253
+ }
254
+
255
+ response = requests.post(self._endpoint_url, headers=headers, json=payload,
256
+ timeout=60)
257
+
258
+ logger.info(f"LLM call status code: {response.status_code}, status reason: {response.reason}")
259
+ response_dict = response.json()
260
+ final_response = response_dict["choices"][0]["message"]["content"]
261
+ return get_json_from_model_response(final_response)
262
+
263
+ def _get_credentials_from_secret(self):
264
+ """Loads Google Cloud credentials from an environment variable."""
265
+
266
+ if not self._api_key:
267
+ raise ValueError(
268
+ f"Environment variable 'GCLOUD_SA_KEY' not found. Please set it in your Hugging Face Space secrets.")
269
+ logger.info("Loading Google Cloud credentials...")
270
+ # Parse the JSON string into a dictionary
271
+ credentials_info = json.loads(self._api_key)
272
+
273
+ logger.info("Google Cloud credentials loaded.")
274
+ # Define the required scopes for the API you want to access
275
+ scopes = ['https://www.googleapis.com/auth/cloud-platform']
276
+
277
+ # Create credentials from the dictionary
278
+ credentials = service_account.Credentials.from_service_account_info(
279
+ credentials_info,
280
+ scopes=scopes
281
+ )
282
+
283
+ return credentials
284
+
285
+ def _get_access_token(self, credentials):
286
+ """Refreshes the credentials to get a valid access token."""
287
+ from google.auth.transport.requests import Request
288
+
289
+ # Refresh the token to ensure it's not expired
290
+ credentials.refresh(Request())
291
+ return credentials.token
backend/models.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Any
17
+
18
+
19
+ @dataclass
20
+ class ClinicalMCQ:
21
+ id: str
22
+ question: str
23
+ choices: dict[str, str]
24
+ hint: str
25
+ answer: str
26
+ rationale: str
27
+
28
+
29
+ @dataclass
30
+ class Case:
31
+ id: str
32
+ condition_name: str
33
+ ground_truth_labels: dict[str, str]
34
+ download_image_url: str
35
+ potential_findings: str
36
+
37
+
38
+ #### For Summary ####
39
+ @dataclass
40
+ class UserResponse:
41
+ """Represents the user's attempts for a single question."""
42
+ attempt1: str
43
+ attempt2: str | None
44
+
45
+
46
+ @dataclass
47
+ class ConversationTurn:
48
+ clinicalMcq: ClinicalMCQ
49
+ userResponse: UserResponse
50
+
51
+ @classmethod
52
+ def from_dict(cls, data: dict[str, Any]) -> "ConversationTurn":
53
+ """
54
+ A factory method to create a ConversationTurn instance from a dictionary.
55
+ This handles the nested instantiation of the other dataclasses.
56
+ """
57
+ # This will raise a TypeError or KeyError if the structure is wrong,
58
+ # which provides robust validation.
59
+ question_data = data['ModelResponse']
60
+ user_response_data = data['UserResponse']
61
+
62
+ return cls(
63
+ clinicalMcq=ClinicalMCQ(**question_data),
64
+ userResponse=UserResponse(**user_response_data)
65
+ )
66
+
67
+
68
+ @dataclass
69
+ class QuestionOutcome:
70
+ """Represents a single outcome line for a question."""
71
+ type: str # "Correct" or "Incorrect"
72
+ text: str # The actual answer text
73
+
74
+
75
+ @dataclass
76
+ class AnswerLog:
77
+ """A log detailing the user's performance on a single question for the rationale,
78
+ now including explicit correct and user's chosen (if incorrect) answers."""
79
+ question: str
80
+ outcomes: list[QuestionOutcome] # A list to hold multiple outcome lines
81
+
82
+ @classmethod
83
+ def from_dict(cls, data: dict) -> "AnswerLog":
84
+ # Convert the list of outcome dicts into a list of QuestionOutcome objects
85
+ outcomes = [QuestionOutcome(**o) for o in data['outcomes']]
86
+ return cls(question=data['question'], outcomes=outcomes)
87
+
88
+
89
+ @dataclass
90
+ class CaseSummary:
91
+ """Represents the final, structured summary with the new fields."""
92
+ med_gemma_interpretation: str
93
+ rationale: list[AnswerLog]
94
+ potential_findings: str
95
+ guideline_specific_resource: str
96
+ condition: str
97
+
98
+ @classmethod
99
+ def from_dict(cls, data: dict) -> "CaseSummary":
100
+ # Use the AnswerLog.from_dict method to reconstruct the rationale list
101
+ rationale_logs = [AnswerLog.from_dict(r) for r in data['rationale']]
102
+ return cls(
103
+ med_gemma_interpretation=data['med_gemma_interpretation'],
104
+ rationale=rationale_logs,
105
+ potential_findings=data['potential_findings'],
106
+ guideline_specific_resource=data['guideline_specific_resource'],
107
+ condition=data['condition']
108
+ )
backend/prompts.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # --- PROMPT FOR WHEN RAG IS DISABLED ---
16
+
17
+ mcq_prompt_all_questions_with_rag = """
18
+ You are a distinguished medical professor and an expert in radiological interpretation. You are designing a learning experience for medical students. Your signature teaching style is based on the Socratic method: you guide students from basic visual evidence to a final conclusion without giving away the answer prematurely.
19
+
20
+ ### Your Pedagogical Mandate
21
+ Your entire goal is to teach a **process of visual analysis**, not just to test final knowledge. You will create a learning module that forces the student to build a case from the ground up.
22
+
23
+ 1. **Observation First, Interpretation Last:** This is the core of your method. The student must first learn to SEE. Your questions will guide their eyes to specific findings on the image.
24
+ 2. **Purposeful Rationales:** Your rationales must also follow this principle.
25
+ * For **observational questions (Q1-4)**, the `rationale` must explain the **radiological principle** of the finding (e.g., "the border is obscured due to loss of silhouette against an adjacent fluid-density opacity"), not the `<significant_clinical_conditions>` it represents.
26
+ * For the **final diagnostic question (Q5)**, the `rationale` can and should explain how the signs point to the specific pathology.
27
+ 3. Since Chest X-Ray alone is not enough for concluding diagnosis, instead of using the term "diagnosis" use terms like "finding", "clinical condition", "clinical abnormality", etc.
28
+
29
+ ### Primary Task
30
+ Your output will be a single, valid JSON object wrapped in a markdown code block (```json ... ```).
31
+
32
+ ---
33
+ ### INPUT STRUCTURE FORMAT
34
+ You will be provided with the following inputs wrapped in XML-like tags:
35
+
36
+ 1. **`<chest_x_ray_image>` (Image):** The uploaded Chest X-Ray image that the entire learning module must be based on. Remember, a frontal CXR image will show the right hemithorax on the left side of the image and the left hemithorax on the right side of the image.
37
+ 2. **`<significant_clinical_conditions>` (JSON Object):** Your secret "Answer Key" containing the definitive clinical findings. This is for your guidance ONLY.
38
+ 3. **`<guideline_context>` (Text Block):** Retrieved knowledge from a clinical guideline. This is to be used ONLY for generating the `rationale` and `hint`.
39
+
40
+ ---
41
+ ### OUTPUT JSON STRUCTURE DEFINITION
42
+ You MUST generate a JSON object with the following top-level keys: `reasoning_steps` and `questions`.
43
+
44
+ 1. **`reasoning_steps` (Object):** This is your internal lesson plan.
45
+ * `final_clinical_conditions` (String): The conditions from `<significant_clinical_conditions>`.
46
+ * `observation_pathway` (Array of Strings): An array of exactly 5 strings outlining the Socratic path, specific to the image and including laterality.
47
+
48
+ 2. **`questions` (Array of Objects):** An array of 5 question objects that execute your lesson plan.
49
+ * Each object must have the keys: `question`, `choices` (an object with A,B,C,D), `answer`, `rationale`, `hint`.
50
+
51
+ ---
52
+ ### CONTENT & LOGIC RULES
53
+ 1. **Instruction for observation pathways:**
54
+ * **Core Instruction:** An array of exactly 5 strings outlining the Socratic path, specific to the image.
55
+
56
+ * ** When no abnormalities are present, the pathway must confirm the normalcy of key anatomical structures in a logical order (e.g., assess technical quality, then cardiac silhouette, then lung fields, then costophrenic angles).
57
+ * **Be Firm on Laterality:** The `observation_pathway` and `questions` must be specific to the side (left/right) shown in the image, using the 'L' or 'R' marker in the image as a definitive cue.
58
+ * **Include helpful observations to reduce repetition:** You can also add observation pathways based on visual observations which could help rule out other common clinical conditions.
59
+ * **Avoid Absolute Measurements Observations:** Since the CXR is not to scale, do not generate observation pathways which requires absolute measurements. Example: Size in cm for the width of the mediastinum. Diameter of the heart in cm.
60
+
61
+
62
+ 2. **Question Generation via Mapping:**
63
+ * ** Core Instruction:** The 5 questions you generate MUST correspond directly and in order to the 5 steps in your `observation_pathway`.
64
+ * **Plausible Distractor Answer Choices:** For Q1-4, choice distractors MUST be other plausible but incorrect radiological signs. For Q5, distractors MUST be relevant differential diagnoses for the visual finding (e.g., other conditions that can look similar on the film).
65
+ * **No Information Leakage (Q1-4):** The diagnostic terms from `<final_clinical_conditions>` MUST NOT appear in the `question`, `choices`, `rationale`, or `hint` for the first four questions.
66
+ * **Guideline Usage:** Use the relevant parts of `<guideline_context>` ONLY to generate the `rationale` and `hint`, and not the question text itself. Do not include the `<final_clinical_conditions>` in the the rationale or the hint.
67
+ * **Conciseness:** The `rationale` and `hint` strings MUST NOT exceed 30 words.
68
+ * **Relevance to X-Ray Image:** The questions **must** be relevant to the X-Ray image provided.
69
+ * **5th Question Instructions:** Ask the student to **synthesize the different observations** made earlier and provide a list of options consisting of the expected clinical condition along with 3 other viable options. This should be done even if the X-Ray image is normal.
70
+ ---
71
+ ### COMPLETE EXAMPLE (Demonstrating All Rules)
72
+
73
+ **LIVE INPUT:**
74
+ <significant_clinical_conditions>
75
+ {"left middle lobe pneumonia": "yes"}
76
+ </significant_clinical_conditions>
77
+ <guideline_context>
78
+ Pneumonia is an inflammatory condition of the lung primarily affecting the small air sacs (alveoli). On a chest X-ray, look for areas of consolidation, which appear as ill-defined increased opacities (whiteness), sometimes with air bronchograms (dark, branching airways visible within the white consolidation).
79
+ </guideline_context>
80
+
81
+ **OUTPUT:**
82
+ ```json
83
+ {
84
+ "reasoning_steps": {
85
+ "final_clinical_conditions": "Left Middle Lobe Pneumonia",
86
+ "observation_pathway": [
87
+ "Assess the overall technical quality and patient positioning of the radiograph.",
88
+ "Identify areas of increased opacity (whiteness) within the lung fields.",
89
+ "Localize the increased opacity to a specific lobe, paying attention to the borders and effacement of normal structures.",
90
+ "Look for associated signs such as air bronchograms or volume loss.",
91
+ "Synthesize the evidence to determine the final findings."
92
+ ]
93
+ },
94
+ "questions": [
95
+ {
96
+ "question": "Which of the following best describes the technical quality of this radiograph?",
97
+ "choices": {
98
+ "A": "Significant patient rotation is present.",
99
+ "B": "Adequate inspiration and penetration",
100
+ "C": "The image is significantly under-penetrated.",
101
+ "D": "It is an AP supine view, not a PA upright view."
102
+ },
103
+ "answer": "B",
104
+ "rationale": "The film shows clear lung markings where present and adequate visibility of the thoracic spine, indicating proper exposure.",
105
+ "hint": "Assess if you can see the vertebrae behind the heart and count the posterior ribs visible above the diaphragm."
106
+ },
107
+ {
108
+ "question": "What change in opacity is noted in the left mid-lung zone?",
109
+ "choices": {
110
+ "A": "It is significantly more lucent (blacker).",
111
+ "B": "There is a discrete, well-circumscribed nodule.",
112
+ "C": "There is an ill-defined area of increased opacity.",
113
+ "D": "No significant change in opacity is visible."
114
+ },
115
+ "answer": "C",
116
+ "rationale": "Increased opacity suggests consolidation, which is a key finding in certain lung conditions.",
117
+ "hint": "Focus on the general whiteness or grayness of the lung parenchyma compared to normal lung."
118
+ },
119
+ {
120
+ "question": "Which of the following describes the appearance of the left heart border?",
121
+ "choices": {
122
+ "A": "It is sharply demarcated.",
123
+ "B": "It is completely obscured or silhouetted.",
124
+ "C": "It is displaced laterally.",
125
+ "D": "It is less prominent than usual."
126
+ },
127
+ "answer": "B",
128
+ "rationale": "Loss of definition of a normal anatomical border (silhouette sign) suggests an abnormality in the adjacent lung segment.",
129
+ "hint": "Observe if the outline of the left side of the heart is clearly visible or if it blends into the surrounding opacity."
130
+ },
131
+ {
132
+ "question": "Are there any visible air bronchograms within the area of increased opacity?",
133
+ "choices": {
134
+ "A": "Yes, lucent branching structures are seen within the opacity.",
135
+ "B": "No, the opacity is uniformly dense.",
136
+ "C": "Only fluid levels are visible.",
137
+ "D": "The opacity is too faint to assess for air bronchograms."
138
+ },
139
+ "answer": "A",
140
+ "rationale": "Air bronchograms indicate that the airspaces are filled with fluid or exudate, but the bronchi remain patent, a classic sign of consolidation.",
141
+ "hint": "Look for dark, branching, tubular structures against the background of the white consolidation."
142
+ },
143
+ {
144
+ "question": "Synthesizing the observations of increased opacity in the left mid-lung zone, obscuration of the left heart border, and presence of air bronchograms, what is the most likely finding?",
145
+ "choices": {
146
+ "A": "Left-sided pleural effusion",
147
+ "B": "Left Middle Lobe Pneumonia",
148
+ "C": "Left upper lobe collapse",
149
+ "D": "Left lower lobe atelectasis"
150
+ },
151
+ "answer": "B",
152
+ "rationale": "The combination of consolidation in the left mid-lung zone, silhouetting of the left heart border (due to involvement of the left middle lobe), and air bronchograms is highly characteristic of pneumonia affecting the left middle lobe.",
153
+ "hint": "The 'silhouette sign' is crucial for localizing the pathology."
154
+ }
155
+ ]
156
+ }
157
+ ```
158
+
159
+ ---
160
+ ### LIVE TASK
161
+ Now, apply your expert Socratic teaching method. Generate a single JSON object for the following live inputs, strictly adhering to all structure, content, and logic rules defined above.
162
+
163
+ **LIVE INPUT:**
164
+ <chest_x_ray_image>
165
+ """
backend/rag/__init__.py ADDED
File without changes
backend/rag/knowledge_base.py ADDED
@@ -0,0 +1,568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import concurrent.futures
16
+ import logging
17
+ import os
18
+ import re
19
+ from pathlib import Path
20
+ from typing import Dict, List
21
+
22
+ import fitz # PyMuPDF
23
+ from PIL import Image
24
+ from langchain.docstore.document import Document as LangchainDocument
25
+ from langchain.retrievers import BM25Retriever, EnsembleRetriever
26
+ from langchain.text_splitter import NLTKTextSplitter
27
+ from langchain_community.vectorstores import Chroma
28
+ from tqdm import tqdm
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ IMAGE_SUMMARY_PROMPT = """Summarize key findings in this image."""
33
+
34
+
35
+ class KnowledgeBase:
36
+ """Processes a source PDF and builds a self-contained, searchable RAG knowledge base."""
37
+
38
+ def __init__(self, models: dict, config_overrides: dict | None = None):
39
+ """Initializes the builder with necessary models and configuration."""
40
+ self.embedder = models.get("embedder")
41
+ self.ner_pipeline = models.get("ner_pipeline")
42
+
43
+ # Set default config and apply any overrides
44
+ self.config = self._get_default_config()
45
+ if config_overrides:
46
+ self.config.update(config_overrides)
47
+
48
+ # For consistent chunking, the RAG query uses the same enriching and chunking logic as the knowledge base.
49
+ self.document_enricher = self._enrich_documents
50
+ self.chunker = self._create_chunks_from_documents
51
+ self.retriever: EnsembleRetriever | None = None
52
+ self.page_map: Dict[int, Dict] = {}
53
+ self.source_filepath = ""
54
+
55
+ # Create necessary directories from config
56
+ Path(self.config["IMAGE_DIR"]).mkdir(parents=True, exist_ok=True)
57
+ Path(self.config["CHROMA_PERSIST_DIR"]).mkdir(parents=True, exist_ok=True)
58
+
59
+ def _get_default_config(self):
60
+ """Returns the default configuration for the KnowledgeBase."""
61
+ return {
62
+ "IMAGE_DIR": Path("processed_figures_kb/"),
63
+ "CHROMA_PERSIST_DIR": Path("chroma_db_store/"),
64
+ "MEDICAL_ENTITY_TYPES_TO_EXTRACT": ["PROBLEM"],
65
+ "EXTRACT_IMAGE_SUMMARIES": False, # Disabled as we don't load the LLM here
66
+ "FILTER_FIRST_PAGES": 6,
67
+ "FIGURE_MIN_WIDTH": 30,
68
+ "FIGURE_MIN_HEIGHT": 30,
69
+ "SENTENCE_CHUNK_SIZE": 250,
70
+ "CHUNK_FILTER_SIZE": 20,
71
+ "RETRIEVER_TOP_K": 20,
72
+ "ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER": [0.2, 0.3, 0.5],
73
+ "SENTENCE_SCORE_THRESHOLD": 0.6,
74
+ "NER_SCORE_THRESHOLD": 0.5,
75
+ "MAX_PARALLEL_WORKERS": 16,
76
+ }
77
+
78
+ def build(self, pdf_filepath: str):
79
+ """The main public method to build the knowledge base from a PDF."""
80
+ logger.info(f"--------- Building Knowledge Base from '{pdf_filepath}' ---------")
81
+ pdf_path = Path(pdf_filepath)
82
+ if not pdf_path.exists():
83
+ logger.error(f"ERROR: PDF file not found at {pdf_filepath}")
84
+ return None
85
+
86
+ self.source_filepath = pdf_path
87
+
88
+ # Step 1: Process the PDF and build the structured page_map.
89
+ self.page_map = self._process_and_structure_pdf(pdf_path)
90
+ all_docs = [
91
+ doc for page_data in self.page_map.values() for doc in page_data["blocks"]
92
+ ]
93
+
94
+ # Step 2: Enrich documents with NER metadata.
95
+ enriched_docs = self._enrich_documents(all_docs, self.config.get("EXTRACT_IMAGE_SUMMARIES", False))
96
+
97
+ # Step 3: Chunk the enriched documents into final searchable units.
98
+ final_chunks = self._create_chunks_from_documents(enriched_docs)
99
+
100
+ # Step 4: Build the final ensemble retriever.
101
+ self.retriever = self._build_ensemble_retriever(final_chunks)
102
+
103
+ if self.retriever:
104
+ logger.info(f"--------- Knowledge Base Built Successfully ---------")
105
+ else:
106
+ logger.error(f"--------- Knowledge Base Building Failed ---------")
107
+
108
+ return self
109
+
110
+ # --- Step 1: PDF Content Extraction ---
111
+ def _process_and_structure_pdf(self, pdf_path: Path) -> dict:
112
+ """Processes a PDF in parallel and directly builds the final page_map.
113
+
114
+ This version is more efficient by opening the PDF only once.
115
+ """
116
+ logger.info("Step 1: Processing PDF and building structured page map...")
117
+ page_map = {}
118
+
119
+ try:
120
+ # Improvement: Open the PDF ONCE to get all preliminary info
121
+ with fitz.open(pdf_path) as doc:
122
+ pdf_bytes_buffer = doc.write()
123
+ page_count = len(doc)
124
+ toc = doc.get_toc()
125
+
126
+ # Improvement: Create a more robust chapter lookup map
127
+ page_to_chapter_id = {}
128
+ if toc:
129
+ chapters = [item for item in toc if item[0] == 1]
130
+ for i, (lvl, title, start_page) in enumerate(chapters):
131
+ end_page = (
132
+ chapters[i + 1][2] - 1 if i + 1 < len(chapters) else page_count
133
+ )
134
+ for page_num in range(start_page, end_page + 1):
135
+ page_to_chapter_id[page_num] = i
136
+
137
+ # Create tasks for the thread pool (using a tuple as requested)
138
+ tasks = [
139
+ (
140
+ pdf_bytes_buffer,
141
+ i,
142
+ self.config,
143
+ pdf_path.name,
144
+ page_to_chapter_id,
145
+ )
146
+ for i in range(self.config["FILTER_FIRST_PAGES"], page_count)
147
+ ]
148
+
149
+ # Parallel Processing
150
+ num_workers = min(
151
+ self.config["MAX_PARALLEL_WORKERS"], os.cpu_count() or 1
152
+ )
153
+ with concurrent.futures.ThreadPoolExecutor(
154
+ max_workers=num_workers
155
+ ) as executor:
156
+ futures = [
157
+ executor.submit(self.process_single_page, task) for task in tasks
158
+ ]
159
+ progress_bar = tqdm(
160
+ concurrent.futures.as_completed(futures),
161
+ total=len(tasks),
162
+ desc="Processing & Structuring Pages",
163
+ )
164
+ for future in progress_bar:
165
+ result = future.result()
166
+ if result:
167
+ # The worker now returns a fully formed dictionary for the page_map
168
+ page_map[result["page_num"]] = result["content"]
169
+
170
+ except Exception as e:
171
+ logger.error(f"❌ Failed to process PDF {pdf_path.name}: {e}")
172
+ return {}
173
+
174
+ logger.info(f"✅ PDF processed. Created a map of {len(page_map)} pages.")
175
+ return dict(sorted(page_map.items()))
176
+
177
+ # --- Step 2: Document Enrichment ---
178
+ def _enrich_documents(
179
+ self, docs: List[LangchainDocument], summarize: bool = False
180
+ ) -> List[LangchainDocument]:
181
+ """Enriches a list of documents with NER metadata and image summaries."""
182
+ logger.info("\nStep 2: Enriching documents...")
183
+ # NER Enrichment
184
+ if self.ner_pipeline:
185
+ logger.info("Adding NER metadata...")
186
+ for doc in tqdm(docs, desc="Enriching with NER"):
187
+ # 1. Skip documents that have no actual text content
188
+ if not doc.page_content or not doc.page_content.strip():
189
+ continue
190
+
191
+ try:
192
+ # 2. Process ONLY the text of the current document
193
+ processed_doc = self.ner_pipeline(doc.page_content)
194
+
195
+ # 3. Extract entities from the result. This result now
196
+ # unambiguously belongs to the current 'doc'.
197
+ entities = [
198
+ ent.text
199
+ for ent in processed_doc.ents
200
+ if ent.type in self.config["MEDICAL_ENTITY_TYPES_TO_EXTRACT"]
201
+ ]
202
+
203
+ # 4. Assign the correctly mapped entities to the document's metadata
204
+ if entities:
205
+ # Using set() handles duplicates before sorting and joining
206
+ unique_entities = sorted(list(set(entities)))
207
+ doc.metadata["block_ner_entities"] = ", ".join(unique_entities)
208
+
209
+ except Exception as e:
210
+ # Add error handling for robustness in case a single block fails
211
+ logger.warning(
212
+ f"\nWarning: Could not process NER for a block on page {doc.metadata.get('page_number', 'N/A')}: {e}")
213
+
214
+ # Image Summary Enrichment
215
+ if summarize:
216
+ logger.info("Generating image summaries...")
217
+ docs_with_figures = [
218
+ doc for doc in docs if "linked_figure_path" in doc.metadata
219
+ ]
220
+ for doc in tqdm(docs_with_figures, desc="Summarizing Images"):
221
+ try:
222
+ img = Image.open(doc.metadata["linked_figure_path"]).convert("RGB")
223
+ summary = self._summarize_image(img)
224
+ if summary:
225
+ doc.metadata["image_summary"] = summary
226
+ except Exception as e:
227
+ logger.warning(
228
+ "Warning: Could not summarize image"
229
+ f" {doc.metadata.get('linked_figure_path', '')}: {e}"
230
+ )
231
+ return docs
232
+
233
+ def _summarize_image(self, pil_image: Image.Image) -> str:
234
+ """Helper method to call the LLM for image summarization."""
235
+ if not self.llm_pipeline:
236
+ return ""
237
+ messages = [{
238
+ "role": "user",
239
+ "content": [
240
+ {"type": "text", "text": IMAGE_SUMMARY_PROMPT},
241
+ {"type": "image", "image": pil_image},
242
+ ],
243
+ }]
244
+ try:
245
+ output = self.llm_pipeline(text=messages, max_new_tokens=150)
246
+ return output[0]["generated_text"][-1]["content"].strip()
247
+ except Exception:
248
+ return ""
249
+
250
+ # --- Step 3: Document Chunking ---
251
+ def _create_chunks_from_documents(
252
+ self, enriched_docs: List[LangchainDocument], display_results: bool = True
253
+ ) -> List[LangchainDocument]:
254
+ """Takes enriched documents and creates the final list of chunks for indexing.
255
+
256
+ This method now has a single responsibility: chunking.
257
+ """
258
+ if display_results:
259
+ logger.info("\nStep 3: Creating final chunks...")
260
+
261
+ # Sentence Splitting
262
+ if display_results:
263
+ logger.info("Applying NLTK Sentence Splitting...")
264
+ splitter = NLTKTextSplitter(chunk_size=self.config["SENTENCE_CHUNK_SIZE"])
265
+ sentence_chunks = splitter.split_documents(enriched_docs)
266
+ if display_results:
267
+ logger.info(f"Generated {len(sentence_chunks)} sentence-level chunks.")
268
+
269
+ # NER Entity Chunking (based on previously enriched metadata)
270
+ if display_results:
271
+ logger.info("Creating NER Entity Chunks...")
272
+ ner_entity_chunks = [
273
+ LangchainDocument(
274
+ page_content=entity,
275
+ metadata={**doc.metadata, "chunk_type": "ner_entity_standalone"},
276
+ )
277
+ for doc in enriched_docs
278
+ if (entities_str := doc.metadata.get("block_ner_entities"))
279
+ for entity in entities_str.split(", ")
280
+ if entity
281
+ ]
282
+ if display_results:
283
+ logger.info(f"Added {len(ner_entity_chunks)} NER entity chunks.")
284
+
285
+ all_chunks = sentence_chunks + ner_entity_chunks
286
+ return [chunk for chunk in all_chunks if chunk.page_content]
287
+
288
+ # --- Step 4: Retriever Building ---
289
+ def _build_ensemble_retriever(
290
+ self, chunks: List[LangchainDocument]
291
+ ) -> EnsembleRetriever | None:
292
+ """Builds the final ensemble retriever from the chunks.
293
+
294
+ This method was already well-focused.
295
+ """
296
+ if not chunks:
297
+ logger.error("No chunks to build retriever from.")
298
+ return None
299
+ logger.info("\nStep 4: Building specialized retrievers...")
300
+ sentence_chunks = [
301
+ doc
302
+ for doc in chunks
303
+ if doc.metadata.get("chunk_type") != "ner_entity_standalone"
304
+ ]
305
+ ner_chunks = [
306
+ doc
307
+ for doc in chunks
308
+ if doc.metadata.get("chunk_type") == "ner_entity_standalone"
309
+ ]
310
+ retrievers, weights = [], []
311
+
312
+ if sentence_chunks:
313
+ bm25_retriever = BM25Retriever.from_documents(sentence_chunks)
314
+ bm25_retriever.k = self.config["RETRIEVER_TOP_K"]
315
+ retrievers.append(bm25_retriever)
316
+ weights.append(self.config["ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER"][0])
317
+ sentence_vs = Chroma.from_documents(
318
+ documents=sentence_chunks,
319
+ embedding=self.embedder,
320
+ persist_directory=str(
321
+ self.config["CHROMA_PERSIST_DIR"] / "sentences"
322
+ ),
323
+ )
324
+ vector_retriever = sentence_vs.as_retriever(
325
+ search_type="similarity_score_threshold",
326
+ search_kwargs={
327
+ "k": self.config["RETRIEVER_TOP_K"],
328
+ "score_threshold": self.config["SENTENCE_SCORE_THRESHOLD"],
329
+ },
330
+ )
331
+ retrievers.append(vector_retriever)
332
+ weights.append(self.config["ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER"][1])
333
+
334
+ if ner_chunks:
335
+ ner_vs = Chroma.from_documents(
336
+ documents=ner_chunks,
337
+ embedding=self.embedder,
338
+ persist_directory=str(self.config["CHROMA_PERSIST_DIR"] / "entities"),
339
+ )
340
+ ner_retriever = ner_vs.as_retriever(
341
+ search_type="similarity_score_threshold",
342
+ search_kwargs={
343
+ "k": self.config["RETRIEVER_TOP_K"],
344
+ "score_threshold": self.config["NER_SCORE_THRESHOLD"],
345
+ },
346
+ )
347
+ retrievers.append(ner_retriever)
348
+ weights.append(self.config["ENSEMBLE_WEIGHTS_BM25,SENTENCE,NER"][2])
349
+
350
+ if not retrievers:
351
+ logger.error("⚠️ Could not create any retrievers.")
352
+ return None
353
+ logger.info(f"Creating final ensemble with weights: {weights}")
354
+ return EnsembleRetriever(retrievers=retrievers, weights=weights)
355
+
356
+ @staticmethod
357
+ def process_single_page(args_tuple: tuple) -> dict | None:
358
+ """Worker function for parallel PDF processing.
359
+
360
+ Processes one page and returns a structured dictionary for that page.
361
+ """
362
+ # Unpack arguments (still using a tuple as requested)
363
+ pdf_bytes_buffer, page_num_idx, config, pdf_filename, page_to_chapter_id = (
364
+ args_tuple
365
+ )
366
+
367
+ lc_documents = []
368
+ page_num = page_num_idx + 1
369
+
370
+ try:
371
+ # Improvement: Use a 'with' statement for resource management
372
+ with fitz.open(stream=pdf_bytes_buffer, filetype="pdf") as doc:
373
+ page = doc[page_num_idx]
374
+ # 1. Extract raw, potentially fragmented text blocks
375
+ raw_text_blocks = page.get_text("blocks", sort=True)
376
+
377
+ # 2. Immediately merge blocks into paragraphs >>>
378
+ paragraph_blocks = KnowledgeBase._merge_text_blocks(raw_text_blocks)
379
+
380
+ # 3. Process figures (no change)
381
+ page_figures = []
382
+ for fig_j, path_dict in enumerate(page.get_drawings()):
383
+ bbox = path_dict["rect"]
384
+ if (
385
+ bbox.is_empty
386
+ or bbox.width < config["FIGURE_MIN_WIDTH"]
387
+ or bbox.height < config["FIGURE_MIN_HEIGHT"]
388
+ ):
389
+ continue
390
+
391
+ # Improvement: More concise bounding box padding
392
+ padded_bbox = bbox + (-2, -2, 2, 2)
393
+ padded_bbox.intersect(page.rect)
394
+ if padded_bbox.is_empty:
395
+ continue
396
+
397
+ pix = page.get_pixmap(clip=padded_bbox, dpi=150)
398
+ if pix.width > 0 and pix.height > 0:
399
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
400
+ img_path = (
401
+ config["IMAGE_DIR"]
402
+ / f"{Path(pdf_filename).stem}_p{page_num}_fig{fig_j + 1}.png"
403
+ )
404
+ img.save(img_path)
405
+ page_figures.append({
406
+ "bbox": bbox,
407
+ "path": str(img_path),
408
+ "id": f"Figure {fig_j + 1} on {pdf_filename}, page {page_num}",
409
+ })
410
+
411
+ # 4. Process the clean PARAGRAPH blocks
412
+ text_blocks_on_page = [
413
+ {
414
+ "bbox": fitz.Rect(x0, y0, x1, y1),
415
+ "text": text.strip(),
416
+ "original_idx": b_idx,
417
+ }
418
+ for b_idx, (x0, y0, x1, y1, text, _, _) in enumerate(
419
+ paragraph_blocks
420
+ )
421
+ if text.strip()
422
+ ]
423
+
424
+ # 5. Link captions and create documents
425
+ potential_captions = [
426
+ b
427
+ for b in text_blocks_on_page
428
+ if re.match(r"^\s*Figure\s*\d+", b["text"], re.I)
429
+ ]
430
+ mapped_caption_indices = set()
431
+ for fig_data in page_figures:
432
+ cap_text, cap_idx = KnowledgeBase.find_best_caption_for_figure(
433
+ fig_data["bbox"], potential_captions
434
+ )
435
+ if cap_text and cap_idx not in mapped_caption_indices:
436
+ mapped_caption_indices.add(cap_idx)
437
+ metadata = {
438
+ "source_pdf": pdf_filename,
439
+ "page_number": page_num,
440
+ "chunk_type": "figure-caption",
441
+ "linked_figure_path": fig_data["path"],
442
+ "linked_figure_id": fig_data["id"],
443
+ "block_id": f"{page_num}_{cap_idx}",
444
+ "original_block_text": cap_text,
445
+ }
446
+ lc_documents.append(
447
+ LangchainDocument(page_content=cap_text, metadata=metadata)
448
+ )
449
+
450
+ for block_data in text_blocks_on_page:
451
+ if block_data["original_idx"] in mapped_caption_indices:
452
+ continue
453
+ if KnowledgeBase.should_filter_text_block(
454
+ block_data["text"],
455
+ block_data["bbox"],
456
+ page.rect.height,
457
+ config["CHUNK_FILTER_SIZE"],
458
+ ):
459
+ continue
460
+ metadata = {
461
+ "source_pdf": pdf_filename,
462
+ "page_number": page_num,
463
+ "chunk_type": "text_block",
464
+ "block_id": f"{page_num}_{block_data['original_idx']}",
465
+ "original_block_text": block_data["text"],
466
+ }
467
+ lc_documents.append(
468
+ LangchainDocument(
469
+ page_content=block_data["text"], metadata=metadata
470
+ )
471
+ )
472
+
473
+ except Exception as e:
474
+ logger.error(f"Error processing {pdf_filename} page {page_num}: {e}")
475
+ return None
476
+
477
+ if not lc_documents:
478
+ return None
479
+
480
+ # Structure the final output
481
+ lc_documents.sort(
482
+ key=lambda d: int(d.metadata.get("block_id", "0_0").split("_")[-1])
483
+ )
484
+
485
+ return {
486
+ "page_num": page_num,
487
+ "content": {
488
+ "chapter_id": page_to_chapter_id.get(page_num, -1),
489
+ "blocks": lc_documents,
490
+ },
491
+ }
492
+
493
+ @staticmethod
494
+ def _merge_text_blocks(blocks: list) -> list:
495
+ """Intelligently merges fragmented text blocks into coherent paragraphs."""
496
+ if not blocks:
497
+ return []
498
+ merged_blocks = []
499
+ current_text = ""
500
+ current_bbox = fitz.Rect()
501
+ sentence_enders = {".", "?", "!", "•"}
502
+
503
+ for i, block in enumerate(blocks):
504
+ block_text = block[4].strip()
505
+ if not current_text: # Starting a new paragraph
506
+ current_bbox = fitz.Rect(block[:4])
507
+ current_text = block_text
508
+ else: # Continue existing paragraph
509
+ current_bbox.include_rect(block[:4])
510
+ current_text = f"{current_text} {block_text}"
511
+
512
+ is_last_block = i == len(blocks) - 1
513
+ ends_with_punctuation = block_text.endswith(tuple(sentence_enders))
514
+
515
+ if ends_with_punctuation or is_last_block:
516
+ merged_blocks.append((
517
+ current_bbox.x0,
518
+ current_bbox.y0,
519
+ current_bbox.x1,
520
+ current_bbox.y1,
521
+ current_text,
522
+ len(merged_blocks),
523
+ 0,
524
+ ))
525
+ current_text = ""
526
+ return merged_blocks
527
+
528
+ @staticmethod
529
+ def should_filter_text_block(
530
+ block_text: str,
531
+ block_bbox: fitz.Rect,
532
+ page_height: float,
533
+ filter_size: int,
534
+ ) -> bool:
535
+ """Determines if a text block from a header/footer should be filtered out."""
536
+ is_in_header_area = block_bbox.y0 < (page_height * 0.10)
537
+ is_in_footer_area = block_bbox.y1 > (page_height * 0.80)
538
+ is_short_text = len(block_text) < filter_size
539
+ return (is_in_header_area or is_in_footer_area) and is_short_text
540
+
541
+ @staticmethod
542
+ def find_best_caption_for_figure(
543
+ figure_bbox: fitz.Rect, potential_captions_on_page: list
544
+ ) -> tuple:
545
+ """Finds the best caption for a given figure based on proximity and alignment."""
546
+ best_caption_info = (None, -1)
547
+ min_score = float("inf")
548
+
549
+ for cap_info in potential_captions_on_page:
550
+ cap_bbox = cap_info["bbox"]
551
+ # Heuristic: Score captions directly below the figure
552
+ if cap_bbox.y0 >= figure_bbox.y1 - 10: # Caption starts below the figure
553
+ vertical_dist = cap_bbox.y0 - figure_bbox.y1
554
+ # Calculate horizontal overlap
555
+ overlap_x_start = max(figure_bbox.x0, cap_bbox.x0)
556
+ overlap_x_end = min(figure_bbox.x1, cap_bbox.x1)
557
+ if (
558
+ overlap_x_end - overlap_x_start
559
+ ) > 0: # If they overlap horizontally
560
+ fig_center_x = (figure_bbox.x0 + figure_bbox.x1) / 2
561
+ cap_center_x = (cap_bbox.x0 + cap_bbox.x1) / 2
562
+ horizontal_center_dist = abs(fig_center_x - cap_center_x)
563
+ # Score is a combination of vertical and horizontal distance
564
+ score = vertical_dist + (horizontal_center_dist * 0.5)
565
+ if score < min_score:
566
+ min_score = score
567
+ best_caption_info = (cap_info["text"], cap_info["original_idx"])
568
+ return best_caption_info
backend/rag/model_manager.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import os
17
+ import sys
18
+
19
+ import config
20
+ import nltk
21
+ import stanza
22
+ import torch
23
+ from langchain_community.embeddings import HuggingFaceEmbeddings
24
+
25
+ from .siglip_embedder import CustomSigLipEmbeddings
26
+
27
+ logger = logging.getLogger(__name__)
28
+ EMBEDDING_MODEL_ID = os.environ.get("EMBEDDING_MODEL_ID", None)
29
+
30
+
31
+ class ModelManager:
32
+ """Handles the expensive, one-time setup of downloading and loading all AI models required for RAG."""
33
+
34
+ def __init__(self):
35
+ # Configuration for model identifiers
36
+ self.embedding_model_id = EMBEDDING_MODEL_ID
37
+ self.stanza_ner_package = "mimic"
38
+ self.stanza_ner_processor = "i2b2"
39
+
40
+ def load_models(self) -> dict:
41
+ """
42
+ Initializes and returns a dictionary of model components.
43
+ Note: The main LLM is accessed via API and is NOT loaded here.
44
+ """
45
+ logger.info("--- Initializing RAG-specific Models (Embedder, NER) ---")
46
+ device = "cuda" if torch.cuda.is_available() else "cpu"
47
+ logger.info(f"Using device: {device} for RAG models")
48
+
49
+ models = {}
50
+
51
+ # 1. Load Embedder
52
+ try:
53
+ logger.info(f"Loading embedding model: {self.embedding_model_id}")
54
+ if "siglip" in self.embedding_model_id:
55
+ models["embedder"] = CustomSigLipEmbeddings(
56
+ siglip_model_name=self.embedding_model_id,
57
+ device=device,
58
+ normalize_embeddings=True,
59
+ )
60
+ else:
61
+ models['embedder'] = HuggingFaceEmbeddings(
62
+ model_name=self.embedding_model_id,
63
+ model_kwargs={"device": device},
64
+ encode_kwargs={"normalize_embeddings": True},
65
+ )
66
+ logger.info("✅ Embedding model loaded successfully.")
67
+ except Exception as e:
68
+ logger.error(f"⚠️ Failed to load embedding model: {e}", exc_info=True)
69
+ sys.exit(1)
70
+ models['embedder'] = None
71
+
72
+ # 2. Load Stanza for NER
73
+ try:
74
+ logger.info("Downloading NLTK and Stanza models...")
75
+ stanza.download(
76
+ "en",
77
+ package=self.stanza_ner_package,
78
+ processors={"ner": self.stanza_ner_processor},
79
+ verbose=False,
80
+ )
81
+ logger.info("✅ Stanza models downloaded.")
82
+
83
+ logger.info("Loading Stanza NER Pipeline...")
84
+ models['ner_pipeline'] = stanza.Pipeline(
85
+ lang="en",
86
+ package=self.stanza_ner_package,
87
+ processors={"ner": "i2b2"},
88
+ use_gpu=torch.cuda.is_available(),
89
+ verbose=False,
90
+ tokenize_no_ssplit=True,
91
+ )
92
+ logger.info("✅ Stanza NER Pipeline loaded successfully.")
93
+ except Exception as e:
94
+ logger.error(f"⚠️ Failed to set up Stanza NER pipeline: {e}", exc_info=True)
95
+ models['ner_pipeline'] = None
96
+
97
+ if all(models.values()):
98
+ logger.info("\n✅ All RAG-specific models initialized successfully.")
99
+ else:
100
+ logger.error("\n⚠️ One or more RAG models failed to initialize. Check errors above.")
101
+
102
+ return models
backend/rag/rag_context_engine.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import os
17
+ from typing import List
18
+
19
+ from PIL import Image
20
+ from langchain.docstore.document import Document as LangchainDocument
21
+
22
+ from .knowledge_base import KnowledgeBase
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def format_context_messages_to_string(context_messages: list[dict]) -> str:
28
+ """Takes a list of context message dicts and formats them into a single string."""
29
+ if not context_messages:
30
+ return "No relevant context was retrieved from the guideline document."
31
+
32
+ full_text = [
33
+ msg.get("text", "") for msg in context_messages if msg.get("type") == "text"
34
+ ]
35
+ return "\n".join(full_text)
36
+
37
+
38
+ class RAGContextEngine:
39
+ """Uses a pre-built KnowledgeBase to retrieve and format context for queries."""
40
+
41
+ def __init__(self, knowledge_base: KnowledgeBase, config_overrides: dict | None = None):
42
+ if not isinstance(knowledge_base, KnowledgeBase) or not knowledge_base.retriever:
43
+ raise ValueError("An initialized KnowledgeBase with a built retriever is required.")
44
+ self.kb = knowledge_base
45
+ self.config = self._get_default_config()
46
+ if config_overrides:
47
+ self.config.update(config_overrides)
48
+
49
+ def _get_default_config(self):
50
+ return {
51
+ "FINAL_CONTEXT_TOP_K": 5,
52
+ "CONTEXT_SELECTION_STRATEGY": "chapter_aware_window_expansion",
53
+ "CONTEXT_WINDOW_SIZE": 0,
54
+ "ADD_MAPPED_FIGURES_TO_PROMPT": False,
55
+ }
56
+
57
+ def get_context_messages(self, query_text: str) -> list[dict] | None:
58
+ """Public API to get final, formatted context messages for a long query."""
59
+ final_context_docs = self.retrieve_context_docs(query_text)
60
+ if not final_context_docs:
61
+ logger.warning(f"No relevant context found for query: {query_text}")
62
+ return None
63
+ context_messages, _ = self.build_context_messages(final_context_docs)
64
+ return context_messages
65
+
66
+ def retrieve_context_docs(self, query_text: str) -> list:
67
+ """Handles both short and long queries to retrieve context documents."""
68
+ logger.info(f"Retrieving context documents with query: {query_text}")
69
+ if len(query_text.split()) > 5:
70
+ logger.info("Long query detected. Decomposing into sub-queries...")
71
+ temp_doc = LangchainDocument(page_content=query_text)
72
+ enriched_temp_docs = self.kb.document_enricher([temp_doc], summarize=False)
73
+ query_chunks_as_docs = self.kb.chunker(enriched_docs=enriched_temp_docs, display_results=False)
74
+ sub_queries = list(set([doc.page_content for doc in query_chunks_as_docs]))
75
+ else:
76
+ logger.info("Short query detected. Using direct retrieval.")
77
+ sub_queries = [query_text]
78
+ return self.retrieve_context_docs_for_simple_queries(sub_queries)
79
+
80
+ def get_context_messages_for_simple_queries(self, queries: list[str]) -> list:
81
+ """Retrieves context docs and builds them into formatted messages."""
82
+ final_context_docs = self.retrieve_context_docs_for_simple_queries(queries)
83
+ if not final_context_docs:
84
+ logger.warning(f"No relevant context found for queries: {queries}")
85
+ return []
86
+ context_messages, _ = self.build_context_messages(final_context_docs)
87
+ return context_messages
88
+
89
+ def retrieve_context_docs_for_simple_queries(self, queries: list[str]) -> list:
90
+ """Invokes the retriever for a list of simple queries and selects the final documents."""
91
+ logger.info(f"Retrieving context documents with simple queries: {queries}")
92
+ retrieved_docs = []
93
+ for query in queries:
94
+ docs = self.kb.retriever.invoke(query)
95
+ retrieved_docs.extend(docs)
96
+
97
+ return RAGContextEngine.select_final_context(
98
+ retrieved_docs=retrieved_docs,
99
+ config=self.config,
100
+ page_map=self.kb.page_map,
101
+ )
102
+
103
+ def build_context_messages(
104
+ self, docs: List[LangchainDocument]
105
+ ) -> tuple[list[dict], list[Image.Image]]:
106
+ """Builds a structured list of messages by grouping consecutive text blocks."""
107
+ if not docs:
108
+ return [], []
109
+
110
+ context_messages = []
111
+ images_found = []
112
+ prose_buffer = []
113
+
114
+ def flush_prose_buffer():
115
+ if prose_buffer:
116
+ full_prose = "\n\n".join(prose_buffer)
117
+ context_messages.append({"type": "text", "text": full_prose})
118
+ prose_buffer.clear()
119
+
120
+ add_images = self.config.get("ADD_MAPPED_FIGURES_TO_PROMPT", False)
121
+ for i, doc in enumerate(docs):
122
+ current_page = doc.metadata.get("page_number")
123
+ is_new_page = (i > 0) and (current_page != docs[i - 1].metadata.get("page_number"))
124
+ is_caption = doc.metadata.get("chunk_type") == "figure-caption"
125
+
126
+ if is_new_page or (add_images and is_caption):
127
+ flush_prose_buffer()
128
+
129
+ if add_images and is_caption:
130
+ source_info = f"--- Source: Page {current_page} ---"
131
+ caption_text = f"{source_info}\n{doc.page_content}"
132
+ context_messages.append({"type": "text", "text": caption_text})
133
+ image_path = doc.metadata.get("linked_figure_path")
134
+ if image_path and os.path.exists(image_path):
135
+ try:
136
+ image = Image.open(image_path).convert("RGB")
137
+ context_messages.append({"type": "image", "image": image})
138
+ images_found.append(image)
139
+ except Exception as e:
140
+ logger.warning(f"Could not load image {image_path}: {e}")
141
+ else:
142
+ if not prose_buffer:
143
+ source_info = f"--- Source: Page {current_page} ---"
144
+ prose_buffer.append(f"\n{source_info}\n")
145
+ prose_buffer.append(doc.page_content)
146
+
147
+ flush_prose_buffer()
148
+ return context_messages, images_found
149
+
150
+ @staticmethod
151
+ def select_final_context(retrieved_docs: list, config: dict, page_map: dict) -> list:
152
+ """Selects final context from retrieved documents using the specified strategy."""
153
+ strategy = config.get("CONTEXT_SELECTION_STRATEGY")
154
+ top_k = config.get("FINAL_CONTEXT_TOP_K", 5)
155
+
156
+ def _calculate_block_frequencies(docs_list: list) -> list:
157
+ blocks = {}
158
+ for doc in docs_list:
159
+ if block_id := doc.metadata.get("block_id"):
160
+ if block_id not in blocks:
161
+ blocks[block_id] = []
162
+ blocks[block_id].append(doc)
163
+ return sorted(blocks.items(), key=lambda item: len(item[1]), reverse=True)
164
+
165
+ def _expand_chunks_to_blocks(chunks: list) -> list:
166
+ return [
167
+ LangchainDocument(
168
+ page_content=c.metadata.get("original_block_text", c.page_content),
169
+ metadata=c.metadata,
170
+ )
171
+ for c in chunks
172
+ ]
173
+
174
+ final_context = []
175
+ if strategy == "chapter_aware_window_expansion":
176
+ if not retrieved_docs or not page_map:
177
+ return []
178
+
179
+ scored_blocks = _calculate_block_frequencies(retrieved_docs)
180
+ if not scored_blocks:
181
+ return _expand_chunks_to_blocks(retrieved_docs[:top_k])
182
+
183
+ primary_hit_page = scored_blocks[0][1][0].metadata.get("page_number")
184
+ important_pages = {
185
+ c[0].metadata.get("page_number")
186
+ for _, c in scored_blocks[:top_k]
187
+ if c and c[0].metadata.get("page_number")
188
+ }
189
+
190
+ window_size = config.get("CONTEXT_WINDOW_SIZE", 0)
191
+ pages_to_extract = set()
192
+ for page_num in important_pages:
193
+ current_chapter_info = page_map.get(page_num)
194
+ if not current_chapter_info:
195
+ continue
196
+ current_chapter_id = current_chapter_info["chapter_id"]
197
+ pages_to_extract.add(page_num)
198
+ for i in range(1, window_size + 1):
199
+ if (prev_info := page_map.get(page_num - i)) and prev_info["chapter_id"] == current_chapter_id:
200
+ pages_to_extract.add(page_num - i)
201
+ if (next_info := page_map.get(page_num + i)) and next_info["chapter_id"] == current_chapter_id:
202
+ pages_to_extract.add(page_num + i)
203
+
204
+ sorted_pages = sorted(list(pages_to_extract))
205
+ if primary_hit_page and primary_hit_page in page_map:
206
+ final_context.extend(page_map[primary_hit_page]["blocks"])
207
+ for page_num in sorted_pages:
208
+ if page_num != primary_hit_page and page_num in page_map:
209
+ final_context.extend(page_map[page_num]["blocks"])
210
+
211
+ elif strategy == "rerank_by_frequency":
212
+ scored_blocks = _calculate_block_frequencies(retrieved_docs)
213
+ representative_chunks = [chunks[0] for _, chunks in scored_blocks[:top_k]]
214
+ final_context = _expand_chunks_to_blocks(representative_chunks)
215
+
216
+ elif strategy == "select_by_rank":
217
+ unique_docs_map = {f"{doc.metadata.get('block_id', '')}_{doc.page_content}": doc for doc in retrieved_docs}
218
+ representative_chunks = list(unique_docs_map.values())[:top_k]
219
+ final_context = _expand_chunks_to_blocks(representative_chunks)
220
+
221
+ else:
222
+ logger.warning(f"Unknown strategy '{strategy}'. Defaulting to top-k raw chunks.")
223
+ final_context = retrieved_docs[:top_k]
224
+
225
+ logger.info(f"Selected {len(final_context)} final context blocks using '{strategy}' strategy.")
226
+ return final_context
backend/rag/siglip_embedder.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ from typing import List
17
+
18
+ import torch
19
+ import torch.nn.functional as F
20
+ from langchain.embeddings.base import Embeddings
21
+ from transformers import AutoModel, AutoTokenizer
22
+
23
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
24
+
25
+
26
+ class CustomSigLipEmbeddings(Embeddings):
27
+ """Custom LangChain embedding wrapper for SigLIP models with normalization.
28
+
29
+ It inherits from LangChain's `Embeddings` base class, ensuring it
30
+ implements the required `embed_documents` and `embed_query` methods."""
31
+
32
+ def __init__(self, siglip_model_name: str, device: str = "cpu", normalize_embeddings: bool = True):
33
+ super().__init__()
34
+ self.tokenizer = AutoTokenizer.from_pretrained(siglip_model_name, token=HF_TOKEN)
35
+ self.model = AutoModel.from_pretrained(siglip_model_name, token=HF_TOKEN).to(device)
36
+ self.device = device
37
+ self.normalize_embeddings = normalize_embeddings
38
+
39
+ def _embed(self, texts: List[str]) -> torch.Tensor:
40
+ """Helper function to generate and normalize embeddings."""
41
+ inputs = self.tokenizer(
42
+ texts, padding="max_length", truncation=True, max_length=64, return_tensors="pt"
43
+ ).to(self.device)
44
+
45
+ with torch.no_grad():
46
+ text_features = self.model.get_text_features(**inputs)
47
+
48
+ if self.normalize_embeddings:
49
+ text_features = F.normalize(text_features, p=2, dim=1)
50
+
51
+ return text_features
52
+
53
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
54
+ """Generate normalized embeddings for a list of documents."""
55
+ return self._embed(texts).cpu().numpy().tolist()
56
+
57
+ def embed_query(self, text: str) -> List[float]:
58
+ """Generate a normalized embedding for a single query text."""
59
+ return self._embed([text])[0].cpu().numpy().tolist()
backend/requirements.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # --- Core Flask Application ---
16
+ flask
17
+ gunicorn
18
+ requests
19
+ pillow
20
+ diskcache
21
+ google-auth
22
+
23
+ # --- RAG: PDF and Text Processing ---
24
+ PyMuPDF # For parsing PDF files (fitz)
25
+ nltk # For NLTKTextSplitter
26
+ tqdm # For progress bars during knowledge base build
27
+
28
+ # --- RAG: LangChain Components ---
29
+ langchain
30
+ langchain_community
31
+ langchain-huggingface
32
+ langchain-text-splitters
33
+ chromadb # Vector store for embeddings
34
+ rank_bm25 # For the BM25 sparse retriever
35
+
36
+ # --- RAG: ML/NLP Models & Frameworks ---
37
+ # User-specified versions for reproducibility
38
+ torch==2.6.0
39
+ numpy==2.0.2
40
+
41
+ # Libraries for loading and running the embedding and NER models
42
+ sentence-transformers
43
+ transformers
44
+ accelerate
45
+ bitsandbytes
46
+ sentencepiece
47
+ stanza # For NER (Named Entity Recognition)
backend/routes.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import os
17
+ import shutil # For zipping the cache directory
18
+ from dataclasses import asdict
19
+ from functools import wraps
20
+ from pathlib import Path
21
+
22
+ from flask import Blueprint, request, jsonify, current_app, send_from_directory
23
+
24
+ import case_util
25
+ import config
26
+ from background_task_manager import BackgroundTaskManager
27
+ from models import ConversationTurn
28
+
29
+ # Use pathlib to construct the path to the images directory
30
+ # This is more robust than relative string paths.
31
+ IMAGE_DIR = Path(__file__).parent / 'data/images'
32
+
33
+ main_bp = Blueprint('main', __name__)
34
+ logger = logging.getLogger(__name__)
35
+
36
+ @main_bp.after_request
37
+ def log_full_cycle(response):
38
+ """
39
+ This function runs after a request and has access to both
40
+ the incoming 'request' and the outgoing 'response'.
41
+ """
42
+ if response.status_code != 200:
43
+ logger.error(
44
+ f"Request: {request.method} {request.path} | "
45
+ f"Response Status: {response.status}"
46
+ )
47
+ # You MUST return the response object
48
+ return response
49
+
50
+ @main_bp.route('/api/case/<case_id>/stub', methods=['GET'])
51
+ def get_case(case_id):
52
+ available_reports = current_app.config["AVAILABLE_REPORTS"]
53
+ if case_id not in available_reports:
54
+ logger.error(f"Case Id {case_id} does not exist.")
55
+ return jsonify({"error": f"Case Id {case_id} does not exist."}), 400
56
+
57
+ return jsonify(asdict(available_reports.get(case_id)))
58
+
59
+
60
+ @main_bp.route('/api/case/stub', methods=['GET'])
61
+ def get_cases():
62
+ available_reports = current_app.config["AVAILABLE_REPORTS"]
63
+ cases = available_reports.values()
64
+ return jsonify([asdict(case) for case in cases])
65
+
66
+
67
+ def rag_initialization_complete_required(f):
68
+ @wraps(f)
69
+ def decorated_function(*args, **kwargs):
70
+ task_manager: BackgroundTaskManager = current_app.config.get('TASK_MANAGER')
71
+
72
+ # Check if RAG task has failed
73
+ if task_manager.get_error("rag_system"):
74
+ return jsonify({"error": "A critical background task failed. Check application logs."}), 500
75
+
76
+ # Check if RAG task is still running
77
+ if not task_manager.is_task_done("rag_system"):
78
+ logger.warning("RAG initialization is running..")
79
+ response = jsonify(
80
+ {"status": "initializing", "message": "The system is starting up. Please try again in 60 seconds."})
81
+ response.headers['Retry-After'] = 60
82
+ return response, 503
83
+
84
+ return f(*args, **kwargs)
85
+
86
+ return decorated_function
87
+
88
+
89
+ @main_bp.route('/api/case/<case_id>/all-questions', methods=['GET'])
90
+ @rag_initialization_complete_required
91
+ def get_all_questions(case_id):
92
+ """Retrieves all questions for a given case ID, prioritizing cached data and generating live questions via LLM if necessary."""
93
+ logger.info(f"Retrieve all questions for the given case '{case_id}'")
94
+
95
+ cache_manager = current_app.config['DEMO_CACHE']
96
+ # 1. Check the cache first
97
+ if config.USE_CACHE and cache_manager:
98
+ all_mcqs_sequence = cache_manager.get_all_mcqs_sequence(case_id)
99
+ if len(all_mcqs_sequence) > 0:
100
+ logger.info(f"CACHE HIT for case '{case_id}'")
101
+ randomized_choices_mcqs = case_util.randomize_mcqs(all_mcqs_sequence)
102
+ return jsonify([asdict(mcq) for mcq in randomized_choices_mcqs])
103
+
104
+ # 2. CACHE MISS: Generate live
105
+ logger.info(
106
+ f"CACHE MISS or cache disabled for case '{case_id}'. Generating live question...")
107
+
108
+ llm_client = current_app.config['LLM_CLIENT']
109
+ if not llm_client:
110
+ logger.error(
111
+ "LLM client (REST API) not initialized. Cannot process request.")
112
+ return jsonify({"error": "LLM client not initialized."}), 500
113
+
114
+ static_case_info = current_app.config['AVAILABLE_REPORTS'].get(case_id)
115
+ if not static_case_info:
116
+ logger.error(f"Static case data for id {case_id} not found.")
117
+ return jsonify({"error": f"Static case data for id {case_id} not found."}), 404
118
+
119
+ rag_cache = current_app.config.get('RAG_CONTEXT_CACHE', {})
120
+ prefetched_data = rag_cache.get(case_id, {})
121
+ guideline_context_string = prefetched_data.get("context_string", "")
122
+
123
+ live_generated_mcqs = llm_client.generate_all_questions(
124
+ case_data=asdict(static_case_info),
125
+ guideline_context=guideline_context_string
126
+ )
127
+
128
+ if live_generated_mcqs is not None and len(live_generated_mcqs) > 0:
129
+ # 3. WRITE-THROUGH: Update the cache with the new question if caching is enabled
130
+ if config.USE_CACHE and cache_manager:
131
+ cache_manager.add_all_mcqs_to_case(case_id, live_generated_mcqs)
132
+ randomized_choices_mcqs = case_util.randomize_mcqs(live_generated_mcqs)
133
+ return jsonify([asdict(mcq) for mcq in randomized_choices_mcqs]), 200
134
+ else:
135
+ logger.error("MCQ Sequence generation failed.")
136
+ return jsonify(
137
+ {"error": "MCQ Sequence generation failed."}), 500
138
+
139
+
140
+ @main_bp.route('/api/case/<case_id>/summarize', methods=['POST'])
141
+ @rag_initialization_complete_required
142
+ def get_case_summary(case_id):
143
+ """
144
+ API endpoint to generate a case summary.
145
+ This version first attempts to load from cache, then falls back to building on the fly.
146
+ """
147
+ data = request.get_json(force=True)
148
+ conversation_history_data = data.get('conversation_history')
149
+ if not conversation_history_data:
150
+ logger.error(f"Missing 'conversation_history' in request body for case {case_id}.")
151
+ return jsonify({"error": f"Missing 'conversation_history' in request body for case {case_id}."}), 400
152
+
153
+ try:
154
+ summary_template = None
155
+ # First, try to get the summary from the cache, if caching is enabled
156
+ cache_manager = current_app.config.get('DEMO_CACHE')
157
+ if cache_manager:
158
+ summary_template = cache_manager.get_summary_template(case_id)
159
+ if summary_template:
160
+ logger.info(f"Summary template for case {case_id} found in cache.")
161
+
162
+ # If cache is disabled OR the template was not in the cache, build it now
163
+ if summary_template is None:
164
+ logger.warning(f"Summary template for case {case_id} not in cache or cache disabled. Building on the fly.")
165
+ static_case_info = current_app.config['AVAILABLE_REPORTS'].get(case_id)
166
+ if not static_case_info:
167
+ logger.error(f"Static case data for case {case_id} not found.")
168
+ return jsonify({"error": f"Static case data for case {case_id} not found."}), 404
169
+ summary_template = case_util.build_summary_template(static_case_info,
170
+ current_app.config.get('RAG_CONTEXT_CACHE', {}))
171
+ if cache_manager:
172
+ cache_manager.save_summary_template(case_id, summary_template)
173
+
174
+ if summary_template is None:
175
+ logger.error(f"Summary template not found for case {case_id}.")
176
+ return jsonify({"error": f"An internal error occurred."}), 500
177
+
178
+ # Once summary template is ready, we can programmatically populate rationale based on user's journey
179
+ conversation_turns = [ConversationTurn.from_dict(turn) for turn in conversation_history_data]
180
+ summary = case_util.populate_rationale(summary_template, conversation_turns)
181
+ return jsonify(asdict(summary)), 200
182
+ except Exception as e:
183
+ logger.error(f"Error generating summary for case {case_id}: {e}", exc_info=True)
184
+ return jsonify({"error": f"An internal error occurred: {e}"}), 500
185
+
186
+
187
+ @main_bp.route('/app/download_cache')
188
+ @rag_initialization_complete_required
189
+ def download_cache_zip():
190
+ """Zips the cache directory and serves it for download."""
191
+ zip_filename = "rad-learn-cache.zip"
192
+ # Create the zip file in a temporary directory
193
+ # Using /tmp is common in containerized environments
194
+ temp_dir = "/tmp"
195
+ zip_base_path = os.path.join(temp_dir, "rad-learn-cache") # shutil adds .zip
196
+ zip_filepath = zip_base_path + ".zip"
197
+
198
+ # Ensure the cache directory exists before trying to zip it
199
+ cache_manager = current_app.config.get('DEMO_CACHE')
200
+ cache_directory = cache_manager.cache_directory
201
+
202
+ if not os.path.isdir(cache_directory):
203
+ logger.error(f"Cache directory not found at {cache_directory}")
204
+ return jsonify({"error": f"Cache directory not found on server: {cache_directory}"}), 500
205
+
206
+ try:
207
+ logger.info(f"Creating zip archive of cache directory: {cache_directory} to {zip_filepath}")
208
+ shutil.make_archive(
209
+ zip_base_path, # This is the base name, shutil adds the .zip extension
210
+ "zip",
211
+ cache_directory, # This is the root directory to archive
212
+ )
213
+ logger.info("Zip archive created successfully.")
214
+ # Send the file and then clean it up
215
+ return send_from_directory(temp_dir, zip_filename, as_attachment=True)
216
+ except Exception as e:
217
+ logger.error(f"Error creating or sending zip archive of cache directory: {e}", exc_info=True)
218
+ return jsonify({"error": f"Error creating or sending zip archive: {e}"}), 500
frontend/index.html ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!--
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ -->
16
+
17
+ <!doctype html>
18
+ <html lang="en">
19
+ <head>
20
+ <meta charset="UTF-8"/>
21
+ <link href="/vite.svg" rel="icon" type="image/svg+xml"/>
22
+ <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
23
+ <title>Flask + React App</title>
24
+ </head>
25
+ <body>
26
+ <div id="root"></div>
27
+ <script src="/src/main.jsx" type="module"></script>
28
+ </body>
29
+ </html>
frontend/package.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "flask-react-frontend",
3
+ "private": true,
4
+ "version": "0.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "vite build",
9
+ "preview": "vite preview"
10
+ },
11
+ "dependencies": {
12
+ "react": "^18.2.0",
13
+ "react-dom": "^18.2.0",
14
+ "react-tooltip": "^5.29.1"
15
+ },
16
+ "devDependencies": {
17
+ "@types/react": "^18.2.15",
18
+ "@types/react-dom": "^18.2.7",
19
+ "@vitejs/plugin-react": "^4.0.3",
20
+ "vite": "^4.4.5"
21
+ }
22
+ }
frontend/public/index.html ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!--
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ -->
16
+
17
+ <!doctype html>
18
+ <html lang="en">
19
+ <head>
20
+ <meta charset="UTF-8"/>
21
+ <link href="/vite.svg" rel="icon" type="image/svg+xml"/>
22
+ <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
23
+ <title>React Multi-Screen App</title>
24
+ <link href="https://fonts.googleapis.com" rel="preconnect">
25
+ <link crossorigin href="https://fonts.gstatic.com" rel="preconnect">
26
+ <link href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Google+Sans+Text:ital,wght@0,400;0,500;0,700;1,500&display=swap"
27
+ rel="stylesheet">
28
+ <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:opsz,wght,FILL,[email protected],100..700,0..1,-50..200"
29
+ rel="stylesheet"/>
30
+ </head>
31
+ <body>
32
+ <div id="root"></div>
33
+ <script src="/src/main.jsx" type="module"></script>
34
+ </body>
35
+ </html>
frontend/public/vite.svg ADDED
frontend/src/App.css ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ :root {
18
+ --font-family-text: 'Google Sans Text', 'Segoe UI', Roboto, sans-serif;
19
+ --font-family-display: 'Google Sans', 'Segoe UI', Roboto, sans-serif;
20
+
21
+ /* Font Sizes */
22
+ --font-size-xs: 12px;
23
+ --font-size-sm: 14px;
24
+ --font-size-md: 16px;
25
+ --font-size-lg: 20px;
26
+ --font-size-xl: 22px;
27
+ --font-size-xxl: 24px;
28
+ --font-size-xxxl: 32px;
29
+
30
+ /* Font Weights */
31
+ --font-weight-regular: 400;
32
+ --font-weight-medium: 500;
33
+ --font-weight-bold: 700;
34
+ }
35
+
36
+ .app-container {
37
+ width: 100%;
38
+ min-height: 100vh;
39
+ display: grid;
40
+ place-items: center;
41
+ background-color: #ffffff;
42
+ color: #1a1d21;
43
+ font-family: var(--font-family-text);
44
+ overflow: auto;
45
+ box-sizing: border-box;
46
+ }
47
+
48
+ .material-symbols-outlined {
49
+ font-family: 'Material Symbols Outlined', sans-serif;
50
+ font-weight: normal;
51
+ font-style: normal;
52
+ font-size: 24px;
53
+ line-height: 1;
54
+ letter-spacing: normal;
55
+ text-transform: none;
56
+ display: inline-block;
57
+ white-space: nowrap;
58
+ word-wrap: normal;
59
+ direction: ltr;
60
+ -webkit-font-feature-settings: 'liga';
61
+ -webkit-font-smoothing: antialiased;
62
+ }
63
+
64
+ .custom-tooltip.react-tooltip {
65
+ max-width: 300px;
66
+ background: linear-gradient(135deg, #2a2f3a, #1e1f26);
67
+ color: #f8f9fa;
68
+ font-family: 'Inter', system-ui, sans-serif;
69
+ font-size: 14px;
70
+ line-height: 1.6;
71
+ padding: 12px 16px;
72
+ border-radius: 10px;
73
+ box-shadow: 0 8px 24px rgba(0, 0, 0, 0.25),
74
+ 0 2px 8px rgba(0, 0, 0, 0.15);
75
+ border: 1px solid rgba(255, 255, 255, 0.05);
76
+ white-space: normal;
77
+ word-wrap: break-word;
78
+ opacity: 1;
79
+ z-index: 9999;
80
+ backdrop-filter: blur(6px);
81
+ transition: opacity 0.2s ease, transform 0.2s ease;
82
+ }
83
+
84
+ .custom-tooltip.react-tooltip [data-popper-arrow] {
85
+ width: 10px;
86
+ height: 10px;
87
+ background: inherit;
88
+ transform: rotate(45deg);
89
+ z-index: -1;
90
+ }
frontend/src/App.jsx ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+
18
+ import React, {useState} from 'react';
19
+ import LandingScreen from './screens/LandingScreen';
20
+ import JourneySelectionScreen from './screens/JourneySelectionScreen';
21
+ import ChatScreen from './screens/ChatScreen';
22
+ import SummaryScreen from './screens/SummaryScreen';
23
+ import DetailsOverlay from './components/DetailsOverlay';
24
+ import {Tooltip} from 'react-tooltip'; // ADD THIS
25
+ import 'react-tooltip/dist/react-tooltip.css'; // ADD THIS
26
+
27
+ function App() {
28
+ const [currentScreen, setCurrentScreen] = useState('landing');
29
+ const [selectedJourney, setSelectedJourney] = useState(null);
30
+ const [isDetailsOverlayVisible, setIsDetailsOverlayVisible] = useState(false);
31
+ const [caseImagesCache, setCaseImagesCache] = useState({});
32
+ const [summaryData, setSummaryData] = useState(null);
33
+
34
+ const handleLaunchJourney = (journey) => {
35
+ setSelectedJourney(journey);
36
+ setSummaryData(null);
37
+ setCurrentScreen('chat');
38
+ };
39
+
40
+ const handleNavigate = (screen) => {
41
+ setCurrentScreen(screen);
42
+ };
43
+
44
+ const handleShowDetails = (show) => {
45
+ setIsDetailsOverlayVisible(show);
46
+ };
47
+
48
+ const updateImageCache = (caseId, imageUrl) => {
49
+ setCaseImagesCache(prevCache => ({
50
+ ...prevCache,
51
+ [caseId]: imageUrl
52
+ }));
53
+ };
54
+
55
+ const handleGoToSummary = (data) => {
56
+ setSummaryData(data);
57
+ setCurrentScreen('summary');
58
+ };
59
+
60
+ const renderScreen = () => {
61
+ const screenProps = {
62
+ onNavigate: handleNavigate,
63
+ onShowDetails: () => handleShowDetails(true)
64
+ };
65
+
66
+ switch (currentScreen) {
67
+ case 'journeySelection':
68
+ return <JourneySelectionScreen {...screenProps} onLaunchJourney={handleLaunchJourney}/>;
69
+ case 'chat':
70
+ return (
71
+ <ChatScreen
72
+ {...screenProps}
73
+ journey={selectedJourney}
74
+ cachedImage={caseImagesCache[selectedJourney?.id]}
75
+ onImageLoad={(imageUrl) => updateImageCache(selectedJourney.id, imageUrl)}
76
+ onGoToSummary={handleGoToSummary}
77
+ />
78
+ );
79
+ case 'summary':
80
+ return (
81
+ <SummaryScreen
82
+ {...screenProps}
83
+ journey={selectedJourney}
84
+ cachedImage={caseImagesCache[selectedJourney?.id]}
85
+ summaryData={summaryData}
86
+ />
87
+ );
88
+ case 'landing':
89
+ default:
90
+ return <LandingScreen
91
+ onStartJourney={() => handleNavigate('journeySelection')}
92
+ onShowDetails={screenProps.onShowDetails}
93
+ />;
94
+ }
95
+ };
96
+
97
+ return (
98
+ <div className="app-container">
99
+ {renderScreen()}
100
+ {isDetailsOverlayVisible && <DetailsOverlay onClose={() => handleShowDetails(false)}/>}
101
+
102
+ <Tooltip anchorSelect=".tooltip-trigger" className="custom-tooltip" arrow={true}/>
103
+ </div>
104
+ );
105
+ }
106
+
107
+ export default App;
frontend/src/assets/home_chest_logo.jpg ADDED
frontend/src/components/ChatMessage.jsx ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+ import styles from './ChatMessage.module.css';
19
+ import IconAstrophotography from '../icons/IconAstrophotography';
20
+ import IconPerson from '../icons/IconPerson';
21
+
22
+ const ChatMessage = ({type, text, children}) => {
23
+ const isUser = type === 'user';
24
+ const wrapperClass = isUser ? styles.userMessageWrapper : styles.systemMessageWrapper;
25
+ const bubbleClass = isUser ? styles.userMessageBox : styles.systemMessageBox;
26
+ const icon = isUser ? <IconPerson className={styles.avatarIcon}/> :
27
+ <IconAstrophotography className={styles.avatarIcon}/>;
28
+
29
+ return (
30
+ <div className={wrapperClass}>
31
+ {!isUser && icon}
32
+ <div className={bubbleClass}>
33
+ {text || children}
34
+ </div>
35
+ {isUser && icon}
36
+ </div>
37
+ );
38
+ };
39
+
40
+ export default ChatMessage;
frontend/src/components/ChatMessage.module.css ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ .systemMessageWrapper, .userMessageWrapper {
18
+ display: flex;
19
+ align-items: flex-start;
20
+ gap: 12px;
21
+ width: 100%;
22
+ }
23
+
24
+ .systemMessageWrapper {
25
+ justify-content: flex-start;
26
+ }
27
+
28
+ .userMessageWrapper {
29
+ justify-content: flex-end;
30
+ }
31
+
32
+ .avatarIcon {
33
+ width: 32px;
34
+ height: 32px;
35
+ flex-shrink: 0;
36
+ border-radius: 50%;
37
+ padding: 4px;
38
+ box-sizing: border-box;
39
+ }
40
+
41
+ .systemMessageWrapper .avatarIcon {
42
+ background-color: #C2E7FF;
43
+ color: #0B57D0;
44
+ }
45
+
46
+ .userMessageWrapper .avatarIcon {
47
+ background-color: #0B57D0;
48
+ color: #D3E3FD;
49
+ }
50
+
51
+ .systemMessageBox, .userMessageBox {
52
+ padding: 12px 16px;
53
+ border-radius: 18px;
54
+ max-width: 80%;
55
+ font-size: var(--font-size-md);
56
+ line-height: 1.5;
57
+ white-space: pre-wrap;
58
+ }
59
+
60
+ .systemMessageBox {
61
+ background-color: #E8F0FE;
62
+ color: #1a1d21;
63
+ border-top-left-radius: 4px;
64
+ }
65
+
66
+ .userMessageBox {
67
+ background-color: #E7E7E7;
68
+ color: #353535;
69
+ border-top-right-radius: 4px;
70
+ }
frontend/src/components/DetailsOverlay.jsx ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+ import styles from './DetailsOverlay.module.css';
19
+ import IconClose from '../icons/IconClose';
20
+
21
+ const DetailsOverlay = ({onClose}) => {
22
+ return (
23
+ <div className={styles.dialogOverlay} onClick={onClose} role="dialog" aria-modal="true"
24
+ aria-labelledby="dialog-title">
25
+ <div className={styles.dialogBox} onClick={(e) => e.stopPropagation()}>
26
+ <button id="dialog-close-button" className={styles.dialogCloseBtn} aria-label="Close dialog" onClick={onClose}>
27
+ <IconClose/>
28
+ </button>
29
+ <h2 id="dialog-title" className={styles.dialogTitleText}>Details About This Demo</h2>
30
+ <div className={styles.dialogBodyScrollable}>
31
+ <p>
32
+ <b>The Model:</b> This demo features Google's MedGemma-27B, a Gemma 3-based model fine-tuned for
33
+ comprehending medical text and images, specifically Chest X-Rays. It demonstrates MedGemma's ability to
34
+ facilitate the learning process for medical students by advanced interpretation of medical images and
35
+ contextual question generation while leveraging clinical guidelines. Context from clinical guidelines are
36
+ generated using RAG which utilizes Google's MedSigLIP embedding model to build a vector index database.
37
+ </p>
38
+ <p>
39
+ <b>Accessing and Using the Model:</b> Google's MedGemma-27B is available on{' '}
40
+ <a href="https://huggingface.co/google/medgemma-27b-it" target="_blank" rel="noopener noreferrer">
41
+ HuggingFace<img className={styles.inlineLogo}
42
+ src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"
43
+ alt="Hugging Face Logo"/>
44
+ </a>{' '}
45
+ and{' '}
46
+ <a href="https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/medgemma" target="_blank"
47
+ rel="noopener noreferrer">
48
+ Model Garden <img className={styles.inlineLogo}
49
+ src="https://www.gstatic.com/cloud/images/icons/apple-icon.png"
50
+ alt="Model Garden Logo"/>
51
+ </a>.
52
+ Learn more about using the model and its limitations on the{' '}
53
+ <a href="https://developers.google.com/health-ai-developer-foundations?referral=rad_learning_companion"
54
+ target="_blank" rel="noopener noreferrer">
55
+ HAI-DEF developer site
56
+ </a>.
57
+ </p>
58
+ <p>
59
+ <b>Health AI Developer Foundations (HAI-DEF):</b> Provides a collection of open-weight models and companion
60
+ resources to empower developers in building AI models for healthcare.
61
+ </p>
62
+ <p>
63
+ <b>Enjoying the Demo?</b> We'd love your feedback! If you found this demo helpful, please show your
64
+ appreciation by clicking the ❤️ button on the HuggingFace page, linked at the top.
65
+ </p>
66
+ <p>
67
+ <b>Explore More Demos:</b> Discover additional demos on HuggingFace Spaces or via Colabs:
68
+ </p>
69
+ <ul>
70
+ <li>
71
+ <a href="https://huggingface.co/spaces/google/cxr-foundation-demo" target="_blank"
72
+ rel="noopener noreferrer">
73
+ CXR Foundations Demo <img className={styles.inlineLogo}
74
+ src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"
75
+ alt="Hugging Face Logo"/>
76
+ </a>{' '}
77
+ - Showcases on-browser, data-efficient, and zero-shot classification of CXR images.
78
+ </li>
79
+ <li>
80
+ <a href="https://huggingface.co/spaces/google/path-foundation-demo" target="_blank"
81
+ rel="noopener noreferrer">
82
+ Path Foundations Demo <img className={styles.inlineLogo}
83
+ src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg"
84
+ alt="Hugging Face Logo"/>
85
+ </a>{' '}
86
+ - Highlights on-browser, data-efficient classification and outlier detection within pathology slides.
87
+ </li>
88
+ <li>
89
+ <a href="https://huggingface.co/spaces/google/rad_explain" target="_blank" rel="noopener noreferrer">
90
+ MedGemma Rad Explain <img className={styles.inlineLogo}
91
+ src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/58/Echo_link-blue_icon_slanted.svg/1920px-Echo_link-blue_icon_slanted.svg.png"
92
+ alt="Link icon"/>
93
+ </a>{' '}
94
+ - Analyzes a radiology report and its corresponding CXR/CT image, generating AI explanations for selected
95
+ sentences with visual context.
96
+ </li>
97
+ <li>
98
+ <a href="https://github.com/Google-Health/medgemma/tree/main/notebooks/fine_tune_with_hugging_face.ipynb"
99
+ target="_blank" rel="noopener noreferrer">
100
+ Finetune MedGemma Colab <img className={styles.inlineLogo}
101
+ src="https://upload.wikimedia.org/wikipedia/commons/d/d0/Google_Colaboratory_SVG_Logo.svg"
102
+ alt="Google Colab Logo"/>
103
+ </a>{' '}
104
+ - See an example of how to fine-tune this model.
105
+ </li>
106
+ <li>
107
+ <a href="https://huggingface.co/spaces/google/appoint-ready" target="_blank" rel="noopener noreferrer">
108
+ Simulated Pre-visit Intake <img className={styles.inlineLogo}
109
+ src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/58/Echo_link-blue_icon_slanted.svg/1920px-Echo_link-blue_icon_slanted.svg.png"
110
+ alt="Link icon"/>
111
+ </a>{' '}
112
+ - Simulates a pre-visit patient dialogue, generating an intelligent intake report with self-evaluated
113
+ insights for efficient provider use.
114
+ </li>
115
+ </ul>
116
+ </div>
117
+ </div>
118
+ </div>
119
+ );
120
+ };
121
+
122
+ export default DetailsOverlay;
frontend/src/components/DetailsOverlay.module.css ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ .dialogOverlay {
18
+ position: fixed;
19
+ inset: 0;
20
+ background-color: rgba(0, 0, 0, 0.6);
21
+ display: flex;
22
+ justify-content: center;
23
+ align-items: center;
24
+ z-index: 1000;
25
+ animation: fadeIn 0.3s ease-out;
26
+ }
27
+
28
+ @keyframes fadeIn {
29
+ from {
30
+ opacity: 0;
31
+ }
32
+ to {
33
+ opacity: 1;
34
+ }
35
+ }
36
+
37
+ .dialogBox {
38
+ background: white;
39
+ border-radius: 12px;
40
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
41
+ width: 90%;
42
+ max-width: 600px;
43
+ max-height: 80vh;
44
+ display: flex;
45
+ flex-direction: column;
46
+ position: relative;
47
+ padding: 24px;
48
+ animation: slideUp 0.4s ease-out;
49
+ }
50
+
51
+ @keyframes slideUp {
52
+ from {
53
+ transform: translateY(20px);
54
+ opacity: 0;
55
+ }
56
+ to {
57
+ transform: translateY(0);
58
+ opacity: 1;
59
+ }
60
+ }
61
+
62
+ .dialogCloseBtn {
63
+ position: absolute;
64
+ top: 16px;
65
+ right: 16px;
66
+ background: none;
67
+ border: none;
68
+ cursor: pointer;
69
+ padding: 8px;
70
+ border-radius: 50%;
71
+ display: flex;
72
+ justify-content: center;
73
+ align-items: center;
74
+ transition: background-color 0.2s;
75
+ color: #5f6368;
76
+ }
77
+
78
+ .dialogCloseBtn:hover {
79
+ background-color: #f0f0f0;
80
+ }
81
+
82
+ .dialogTitleText {
83
+ font-family: var(--font-family-display);
84
+ font-size: var(--font-size-xl);
85
+ color: #202124;
86
+ text-align: left;
87
+ margin: 0;
88
+ padding-bottom: 16px;
89
+ border-bottom: 1px solid #e0e0e0;
90
+ }
91
+
92
+ .dialogBodyScrollable {
93
+ overflow-y: auto;
94
+ padding-top: 16px;
95
+ padding-right: 16px;
96
+ text-align: left;
97
+ }
98
+
99
+ .dialogBodyScrollable p {
100
+ font-size: var(--font-size-sm);
101
+ line-height: 1.6;
102
+ margin-bottom: 16px;
103
+ color: #444746;
104
+ }
105
+
106
+ .dialogBodyScrollable b {
107
+ font-weight: var(--font-weight-bold);
108
+ color: #202124;
109
+ }
110
+
111
+ .dialogBodyScrollable a {
112
+ color: #0B57D0;
113
+ text-decoration: underline;
114
+ font-weight: var(--font-weight-medium);
115
+ display: inline-flex;
116
+ align-items: center;
117
+ gap: 4px;
118
+ }
119
+
120
+ .dialogBodyScrollable a:hover {
121
+ text-decoration: underline;
122
+ }
123
+
124
+ .inlineLogo {
125
+ height: 16px;
126
+ width: auto;
127
+ vertical-align: middle;
128
+ margin-left: 2px;
129
+ }
130
+
131
+ .dialogBodyScrollable ul {
132
+ list-style-type: disc;
133
+ padding-left: 20px;
134
+ margin-top: -10px;
135
+ }
136
+
137
+ .dialogBodyScrollable li {
138
+ margin-bottom: 12px;
139
+ padding-left: 4px;
140
+ font-size: var(--font-size-sm);
141
+ line-height: 1.6;
142
+ color: #444746;
143
+ }
frontend/src/components/JourneyCard.jsx ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+ import styles from './JourneyCard.module.css';
19
+ import IconArticlePerson from '../icons/IconArticlePerson';
20
+
21
+ const JourneyCard = ({journey, onLaunch}) => {
22
+ return (
23
+ <div className={styles.card}>
24
+ <img src={journey.imageUrl} alt={journey.label} className={styles.cardImage}/>
25
+ <div className={styles.cardFooter}>
26
+ <span className={styles.label}>{journey.label}</span>
27
+ <button className={styles.launchButton} onClick={onLaunch}>
28
+ <IconArticlePerson className={styles.buttonIcon}/>
29
+ Launch
30
+ </button>
31
+ </div>
32
+ </div>
33
+ );
34
+ };
35
+
36
+ export default JourneyCard;
frontend/src/components/JourneyCard.module.css ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ .card {
18
+ width: 100%;
19
+ max-width: 452px;
20
+ background: white;
21
+ border-radius: 14px;
22
+ border: 2px #E9E9E9 solid;
23
+ display: flex;
24
+ flex-direction: column;
25
+ justify-content: space-between;
26
+ transition: box-shadow 0.3s ease;
27
+ }
28
+
29
+ .card:hover {
30
+ box-shadow: 0 8px 16px rgba(0, 0, 0, 0.1);
31
+ }
32
+
33
+ .cardImage {
34
+ width: 100%;
35
+ aspect-ratio: 1 / 1;
36
+ object-fit: contain;
37
+ }
38
+
39
+ .cardFooter {
40
+ display: flex;
41
+ justify-content: space-between;
42
+ align-items: center;
43
+ padding: 16px 24px;
44
+ background-color: white;
45
+ border-top: 2px #E9E9E9 solid;
46
+ }
47
+
48
+ .label {
49
+ color: black;
50
+ font-size: var(--font-size-md);
51
+ font-weight: var(--font-weight-medium);
52
+ line-height: 24px;
53
+ }
54
+
55
+ .launchButton {
56
+ display: flex;
57
+ align-items: center;
58
+ justify-content: center;
59
+ gap: 8px;
60
+ padding: 8px 16px;
61
+ border-radius: 100px;
62
+ background: #0B57D0;
63
+ color: white;
64
+ border: none;
65
+ font-size: var(--font-size-md);
66
+ font-weight: var(--font-weight-medium);
67
+ font-family: var(--font-family-text);
68
+ cursor: pointer;
69
+ transition: background-color 0.2s;
70
+ }
71
+
72
+ .launchButton:hover {
73
+ background: #0a4ab5;
74
+ }
75
+
76
+ .buttonIcon {
77
+ width: 20px;
78
+ height: 20px;
79
+ fill: white;
80
+ }
frontend/src/components/MCQOption.jsx ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+ import IconRadioButton from '../icons/IconRadioButton';
19
+ import styles from './MCQOption.module.css';
20
+ import TextWithTooltips from './TextWithTooltips';
21
+
22
+ const MCQOption = ({text, onClick, disabled, isSelected, isIncorrect}) => {
23
+ const buttonClasses = [
24
+ styles.optionButton,
25
+ isSelected ? styles.selected : '',
26
+ isIncorrect ? styles.incorrect : ''
27
+ ].join(' ');
28
+
29
+ return (
30
+ <button
31
+ className={buttonClasses}
32
+ onClick={onClick}
33
+ disabled={disabled}
34
+ >
35
+ <IconRadioButton/>
36
+ <span><TextWithTooltips text={text}/></span>
37
+ </button>
38
+ );
39
+ };
40
+
41
+ export default MCQOption;
frontend/src/components/MCQOption.module.css ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ .optionButton {
18
+ display: flex;
19
+ align-items: center;
20
+ gap: 12px;
21
+ width: 100%;
22
+ padding: 12px;
23
+ background-color: transparent;
24
+ border: 1px solid #E0E0E0;
25
+ border-radius: 8px;
26
+ text-align: left;
27
+ font-size: var(--font-size-md);
28
+ font-family: var(--font-family-text);
29
+ color: #333;
30
+ cursor: pointer;
31
+ transition: background-color 0.2s, border-color 0.2s;
32
+ }
33
+
34
+ .optionButton:hover:not(:disabled) {
35
+ background-color: #f0f2f5;
36
+ border-color: #0B57D0;
37
+ }
38
+
39
+ .optionButton:disabled {
40
+ cursor: not-allowed;
41
+ opacity: 0.7;
42
+ }
43
+
44
+ .optionButton.selected {
45
+ border-color: #0B57D0;
46
+ background-color: #e8f0fe;
47
+ }
48
+
49
+ .optionButton.incorrect {
50
+ background-color: #F8D7DA;
51
+ border-color: #F5C6CB;
52
+ color: #721C24;
53
+ text-decoration: line-through;
54
+ cursor: not-allowed;
55
+ opacity: 1;
56
+ }
frontend/src/components/RedactedTextView.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ /**
18
+ * Replaces specified phrases (multi-word strings) in a text string with '__?___'.
19
+ * @param {string} inputText The text to process.
20
+ * @param {string[]} phrasesToRedact An array of phrases to be replaced.
21
+ * @returns {string} The text with specified phrases redacted.
22
+ */
23
+ export const redactPhrases = (inputText, phrasesToRedact) => {
24
+ if (!inputText || !phrasesToRedact || phrasesToRedact.length === 0) {
25
+ return inputText || "";
26
+ }
27
+
28
+ let processedText = inputText;
29
+
30
+ // Sort phrases by length (descending) to redact longer phrases first.
31
+ // This prevents issues where a shorter phrase is part of a longer one.
32
+ const sortedPhrases = phrasesToRedact.sort((a, b) => b.length - a.length);
33
+
34
+ sortedPhrases.forEach(phrase => {
35
+ // Create a global, case-insensitive regex for the current phrase.
36
+ const regex = new RegExp(phrase, 'gi');
37
+ // Replace the found phrase with 'X's of the same length.
38
+ processedText = processedText.replace(regex, '__?__');
39
+ });
40
+
41
+ return processedText;
42
+ };
frontend/src/components/TextWithTooltips.jsx ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+ import {medicalTerms} from '../data/medicalTerms';
19
+
20
+ const TextWithTooltips = ({text}) => {
21
+ const sortedKeys = Object.keys(medicalTerms).sort((a, b) => b.length - a.length);
22
+ const regex = new RegExp(`(${sortedKeys.join('|')})`, 'gi');
23
+ const parts = text.split(regex);
24
+
25
+ return (
26
+ <>
27
+ {parts.map((part, index) => {
28
+ const lowerCasePart = part.toLowerCase();
29
+ if (medicalTerms[lowerCasePart]) {
30
+ return (
31
+ <span
32
+ key={index}
33
+ className="tooltip-trigger"
34
+ data-tooltip-content={medicalTerms[lowerCasePart]}
35
+ style={{borderBottom: '1px dotted'}}
36
+ >
37
+ {part}
38
+ </span>
39
+ );
40
+ }
41
+ return part;
42
+ })}
43
+ </>
44
+ );
45
+ };
46
+
47
+ export default TextWithTooltips;
frontend/src/data/constants.js ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ export const CXR_14_CITATION = `The images are from the NIH Chest X-Ray dataset, which is available for download at: https://nihcc.app.box.com/v/ChestXray-NIHCC. The dataset was provided by the NIH Clinical Center. The following paper provides a detailed description of the dataset:
18
+
19
+ Xiaosong Wang, Yifan Peng, Le Lu, Zhiyong Lu, Mohammadhadi Bagheri, Ronald Summers, ChestX-ray8: Hospital-scale Chest X-ray Database and Benchmarks on Weakly-Supervised Classification and Localization of Common Thorax Diseases, IEEE CVPR, pp. 3462-3471, 2017.`
20
+
21
+ export const CONDITION_TERMS = ["Pleural Effusion", "Cardiomegaly",
22
+ "Cardiomegally", "Atelectasis", "Pneumonia", "Pneumothorax", "Edema",
23
+ "Emphysema", "Fibrosis", "Pleural Thickening", "Hernia"]
frontend/src/data/medicalTerms.js ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ export const medicalTerms = {
18
+ "pleural line": "A very thin, fine white line on an X-Ray that shows the actual edge of the lung, often seen when the lung is separated from the chest wall due to underlying abnormality.",
19
+ "effusion": "A general term for fluid buildup. On a Chest X-Ray, this fluid appears white and often settles in the lowest parts of the chest due to gravity.",
20
+ "costophrenic angle": "The sharp, pointed corner at the very bottom of each lung seen on an X-Ray, located where your diaphragm (breathing muscle) meets your ribs.",
21
+ "mediastinum": "The central area in the chest that sits between the lungs. On an X-Ray, this column contains the heart, windpipe, and major blood vessels.",
22
+ "mediastinal": "An adjective describing anything located within the mediastinum, which is the central compartment of the chest that separates the right and left lungs.",
23
+ "hemithorax": "On a Chest X-Ray (CXR), hemithorax refers to either the right or left half of the chest cavity, encompassing the lung and surrounding structures.",
24
+ "catheter": "A thin, flexible tube that shows up as a distinct line on an X-Ray. It is placed in the body to deliver fluids or for monitoring.",
25
+ "meniscus sign": "A specific crescent or U-shape that fluid creates on a Chest X-Ray as it appears to climb up the side of the lung, like water in a glass.",
26
+ "meniscus": "A curved, half moon shape that appears on an X-Ray generally indicating underlying fluid collection or a mass.",
27
+ "lung apex": "The very top, rounded part of the lung. On a Chest X-Ray, this is the portion of the lung that is visible above the collarbone.",
28
+ "pleural space": "The potential thin gap between the outer surface of the lung and the inner chest wall. This space is visible on an X-Ray only when it’s abnormal.",
29
+ "cardiac": "An adjective meaning 'related to the heart.' In X-Rays, it's used to describe the size, shape, and outline of the heart's shadow.",
30
+ "thoracic": "An adjective meaning 'related to the chest.' It refers to all structures within the chest cavity, including the ribs, heart, and lungs shown on the X-ray.",
31
+ "radiopaque": "Describes substances that block X-Rays and therefore appear white or light on an image. Example - Bone, metal, and contrast dyes are all radiopaque."
32
+ };
frontend/src/icons/IconArticlePerson.jsx ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+
19
+ const IconArticlePerson = ({className, ...props}) => {
20
+ return (
21
+ <svg
22
+ width="20"
23
+ height="20"
24
+ viewBox="14 16 17 17"
25
+ fill="currentColor"
26
+ xmlns="http://www.w3.org/2000/svg"
27
+ className={className}
28
+ {...props}
29
+ preserveAspectRatio="xMidYMid meet"
30
+ >
31
+ <path
32
+ d="M18 21.5H26V20H18V21.5ZM16.5 31C16.0833 31 15.7292 30.8542 15.4375 30.5625C15.1458 30.2708 15 29.9167 15 29.5V18.5C15 18.0833 15.1458 17.7292 15.4375 17.4375C15.7292 17.1458 16.0833 17 16.5 17H27.5C27.9167 17 28.2708 17.1458 28.5625 17.4375C28.8542 17.7292 29 18.0833 29 18.5V22.5417C28.6667 22.2083 28.2847 21.9514 27.8542 21.7708C27.4236 21.5903 26.9722 21.5 26.5 21.5C25.875 21.5 25.2917 21.6597 24.75 21.9792C24.2083 22.2847 23.7778 22.7083 23.4583 23.25H18V24.75H23C22.9722 25.0556 22.9861 25.3542 23.0417 25.6458C23.1111 25.9375 23.2083 26.2222 23.3333 26.5H18V28H22.3333C22.0694 28.25 21.8611 28.5417 21.7083 28.875C21.5694 29.1944 21.5 29.5347 21.5 29.8958V31H16.5ZM23 31V29.8958C23 29.7431 23.0278 29.6042 23.0833 29.4792C23.1389 29.3403 23.2222 29.2222 23.3333 29.125C23.7778 28.75 24.2708 28.4722 24.8125 28.2917C25.3542 28.0972 25.9167 28 26.5 28C27.0833 28 27.6458 28.0972 28.1875 28.2917C28.7292 28.4722 29.2222 28.75 29.6667 29.125C29.7778 29.2222 29.8611 29.3403 29.9167 29.4792C29.9722 29.6042 30 29.7431 30 29.8958V31H23ZM26.5 27C25.9444 27 25.4722 26.8056 25.0833 26.4167C24.6944 26.0278 24.5 25.5556 24.5 25C24.5 24.4444 24.6944 23.9722 25.0833 23.5833C25.4722 23.1944 25.9444 23 26.5 23C27.0556 23 27.5278 23.1944 27.9167 23.5833C28.3056 23.9722 28.5 24.4444 28.5 25C28.5 25.5556 28.3056 26.0278 27.9167 26.4167C27.5278 26.8056 27.0556 27 26.5 27Z"
33
+ />
34
+ </svg>
35
+ );
36
+ };
37
+
38
+ export default IconArticlePerson;
frontend/src/icons/IconAstrophotography.jsx ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+
18
+ import React from 'react';
19
+
20
+ const IconAstrophotography = ({className, ...props}) => {
21
+ return (
22
+ <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"
23
+ className={className} {...props}>
24
+ <path
25
+ d="M19 9L17.75 6.25L15 5L17.75 3.75L19 0.999999L20.25 3.75L23 5L20.25 6.25L19 9ZM19 23L17.75 20.25L15 19L17.75 17.75L19 15L20.25 17.75L23 19L20.25 20.25L19 23ZM10 20L7.5 14.5L2 12L7.5 9.5L10 4L12.5 9.5L18 12L12.5 14.5L10 20ZM10 15.15L11 13L13.15 12L11 11L10 8.85L9 11L6.85 12L9 13L10 15.15Z"
26
+ fill="#0B57D0"/>
27
+ </svg>
28
+ );
29
+ };
30
+
31
+ export default IconAstrophotography;
frontend/src/icons/IconBackArrow.jsx ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+ import React from 'react';
18
+
19
+ const IconBackArrow = ({className, ...props}) => {
20
+ return (
21
+ <svg
22
+ width="20"
23
+ height="20"
24
+ viewBox="0 0 24 24"
25
+ fill="currentColor"
26
+ xmlns="http://www.w3.org/2000/svg"
27
+ className={className}
28
+ {...props}
29
+ >
30
+ <path d="M15.41 7.41L14 6L8 12L14 18L15.41 16.59L10.83 12L15.41 7.41Z"/>
31
+ </svg>
32
+ );
33
+ };
34
+
35
+ export default IconBackArrow;
frontend/src/icons/IconClose.jsx ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+
18
+ import React from 'react';
19
+
20
+ const IconClose = ({className}) => (
21
+ <svg className={className} width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
22
+ <path
23
+ d="M19 6.41L17.59 5L12 10.59L6.41 5L5 6.41L10.59 12L5 17.59L6.41 19L12 13.41L17.59 19L19 17.59L13.41 12L19 6.41Z"
24
+ fill="currentColor"/>
25
+ </svg>
26
+ );
27
+
28
+ export default IconClose;
frontend/src/icons/IconCodeBlocks.jsx ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+
18
+ import React from 'react';
19
+
20
+ const IconCodeBlocks = ({className, ...props}) => {
21
+ return (
22
+ <svg
23
+ width="20"
24
+ height="20"
25
+ viewBox="0 0 20 20"
26
+ fill="currentColor"
27
+ xmlns="http://www.w3.org/2000/svg"
28
+ className={className}
29
+ {...props}
30
+ >
31
+ <path
32
+ d="M8 12.5L9.0625 11.4375L7.625 10L9.0625 8.5625L8 7.5L5.5 10L8 12.5ZM12 12.5L14.5 10L12 7.5L10.9375 8.5625L12.375 10L10.9375 11.4375L12 12.5ZM4.5 17C4.08333 17 3.72917 16.8542 3.4375 16.5625C3.14583 16.2708 3 15.9167 3 15.5V4.5C3 4.08333 3.14583 3.72917 3.4375 3.4375C3.72917 3.14583 4.08333 3 4.5 3H15.5C15.9167 3 16.2708 3.14583 16.5625 3.4375C16.8542 3.72917 17 4.08333 17 4.5V15.5C17 15.9167 16.8542 16.2708 16.5625 16.5625C16.2708 16.8542 15.9167 17 15.5 17H4.5Z"/>
33
+ </svg>
34
+ );
35
+ };
36
+
37
+ export default IconCodeBlocks;
frontend/src/icons/IconGemma.jsx ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ */
16
+
17
+
18
+ import React from 'react';
19
+
20
+ const IconGemma = ({className}) => (
21
+ <svg
22
+ width="107"
23
+ height="25"
24
+ viewBox="60 0 110 25"
25
+ fill="none"
26
+ xmlns="http://www.w3.org/2000/svg"
27
+ className={className}
28
+ aria-hidden="true"
29
+ >
30
+ <mask id="mask0_7159_1111" style={{maskType: 'luminance'}} maskUnits="userSpaceOnUse" x="-14" y="0" width="208"
31
+ height="25">
32
+ <path d="M193.588 0H-13.2987V24.6199H193.588V0Z" fill="white"/>
33
+ </mask>
34
+ <g mask="url(#mask0_7159_1111)">
35
+ <path
36
+ d="M72.5851 23.7369C70.9769 23.7369 69.452 23.4391 68.0106 22.8474C66.5929 22.2558 65.3421 21.4179 64.262 20.3378C63.1819 19.2577 62.3361 17.9989 61.7206 16.5575C61.1051 15.116 60.7993 13.5514 60.7993 11.8558C60.7993 10.1602 61.1051 8.59568 61.7206 7.15422C62.3361 5.71276 63.1819 4.45397 64.262 3.37387C65.3421 2.29377 66.5929 1.4559 68.0106 0.864226C69.452 0.272553 70.9769 -0.0252686 72.5851 -0.0252686C74.1934 -0.0252686 75.8254 0.272553 77.2867 0.864226C78.7679 1.4559 79.987 2.29377 80.94 3.37387L79.034 5.27993C78.5455 4.68826 77.9658 4.18792 77.2867 3.78685C76.6077 3.38578 75.877 3.07605 75.0948 2.86559C74.3125 2.65513 73.4865 2.54791 72.6169 2.54791C71.4097 2.54791 70.2542 2.77029 69.1542 3.21503C68.0543 3.63993 67.0774 4.26337 66.2316 5.08932C65.4056 5.89543 64.7504 6.86831 64.262 8.01195C63.7736 9.15559 63.5313 10.4382 63.5313 11.8558C63.5313 13.2735 63.7736 14.5561 64.262 15.6997C64.7703 16.8434 65.4493 17.8282 66.2951 18.6541C67.1409 19.4602 68.1059 20.0837 69.186 20.5284C70.2859 20.9533 71.4296 21.1638 72.6169 21.1638C73.6334 21.1638 74.6182 21.0248 75.5713 20.7508C76.5442 20.453 77.4138 20.0082 78.1762 19.4165C78.9585 18.801 79.6058 18.0188 80.1141 17.0657C80.6223 16.1127 80.9281 14.9691 81.0353 13.6348H72.6487V11.1252H83.5767C83.6204 11.3992 83.6522 11.6652 83.672 11.9194C83.7157 12.1735 83.7356 12.4475 83.7356 12.7453V12.7771C83.7356 14.4092 83.4616 15.9022 82.9096 17.2563C82.3576 18.5906 81.5873 19.7461 80.5906 20.719C79.5939 21.672 78.4185 22.4146 77.0644 22.9427C75.7103 23.4709 74.2172 23.7369 72.5851 23.7369ZM92.9005 23.7369C91.332 23.7369 89.9342 23.3676 88.7072 22.6251C87.5 21.8825 86.547 20.8659 85.8481 19.5754C85.1691 18.2848 84.8316 16.8116 84.8316 15.1597C84.8316 13.615 85.1492 12.1854 85.7846 10.871C86.4398 9.55665 87.3531 8.50832 88.5166 7.72604C89.7039 6.91994 91.0898 6.51887 92.6782 6.51887C94.2665 6.51887 95.6643 6.88023 96.808 7.59897C97.9714 8.29786 98.8609 9.27074 99.4764 10.5216C100.112 11.7724 100.429 13.202 100.429 14.8102C100.429 14.9572 100.418 15.1081 100.398 15.255C100.398 15.4019 100.386 15.529 100.366 15.6362H87.5318C87.5953 16.7997 87.8614 17.7845 88.326 18.5906C88.8541 19.4999 89.5332 20.179 90.3591 20.6237C91.2049 21.0685 92.0865 21.2908 92.9958 21.2908C94.1832 21.2908 95.156 21.0168 95.9185 20.4649C96.7007 19.8931 97.3282 19.1942 97.7928 18.3682L100.08 19.4801C99.4447 20.7071 98.5353 21.7237 97.348 22.5298C96.1607 23.3359 94.6795 23.7369 92.9005 23.7369ZM87.6906 13.4125H97.5704C97.5505 12.9677 97.4433 12.4912 97.2527 11.9829C97.082 11.4548 96.796 10.9663 96.395 10.5216C96.0138 10.057 95.5174 9.68372 94.9019 9.40973C94.3102 9.1119 93.5677 8.96498 92.6782 8.96498C91.6179 8.96498 90.6966 9.23897 89.9144 9.79094C89.1519 10.3191 88.5682 11.0497 88.1672 11.9829C87.9567 12.4277 87.7979 12.9042 87.6906 13.4125ZM102.002 23.2287V7.02715H104.575V9.40973H104.702C105 8.88159 105.401 8.40507 105.909 7.98018C106.438 7.53543 107.033 7.18599 107.688 6.93185C108.367 6.65785 109.054 6.51887 109.753 6.51887C110.96 6.51887 111.997 6.81669 112.867 7.40836C113.736 7.98018 114.36 8.73069 114.741 9.66387C115.293 8.75452 116.031 8.00004 116.965 7.40836C117.898 6.81669 119.01 6.51887 120.3 6.51887C122.226 6.51887 123.648 7.1026 124.557 8.26609C125.466 9.42958 125.923 10.9544 125.923 12.8406V23.2287H123.255V13.2536C123.255 11.6851 122.937 10.5851 122.301 9.94978C121.666 9.29457 120.765 8.96498 119.601 8.96498C118.775 8.96498 118.033 9.20721 117.378 9.69563C116.742 10.1602 116.234 10.7876 115.853 11.5699C115.491 12.3324 115.313 13.1702 115.313 14.0796V23.2287H112.612V13.2854C112.612 11.7169 112.295 10.605 111.659 9.94978C111.024 9.29457 110.134 8.96498 108.991 8.96498C108.165 8.96498 107.422 9.20721 106.767 9.69563C106.132 10.1841 105.623 10.8194 105.242 11.6017C104.881 12.384 104.702 13.2338 104.702 14.1431V23.2287H102.002ZM128.21 23.2287V7.02715H130.783V9.40973H130.911C131.208 8.88159 131.609 8.40507 132.118 7.98018C132.646 7.53543 133.241 7.18599 133.897 6.93185C134.576 6.65785 135.263 6.51887 135.962 6.51887C137.169 6.51887 138.205 6.81669 139.075 7.40836C139.944 7.98018 140.568 8.73069 140.949 9.66387C141.501 8.75452 142.24 8.00004 143.173 7.40836C144.106 6.81669 145.218 6.51887 146.508 6.51887C148.434 6.51887 149.856 7.1026 150.765 8.26609C151.675 9.42958 152.131 10.9544 152.131 12.8406V23.2287H149.463V13.2536C149.463 11.6851 149.145 10.5851 148.51 9.94978C147.874 9.29457 146.973 8.96498 145.81 8.96498C144.984 8.96498 144.241 9.20721 143.586 9.69563C142.95 10.1602 142.442 10.7876 142.061 11.5699C141.7 12.3324 141.521 13.1702 141.521 14.0796V23.2287H138.821V13.2854C138.821 11.7169 138.503 10.605 137.868 9.94978C137.232 9.29457 136.343 8.96498 135.199 8.96498C134.373 8.96498 133.631 9.20721 132.975 9.69563C132.34 10.1841 131.832 10.8194 131.451 11.6017C131.089 12.384 130.911 13.2338 130.911 14.1431V23.2287H128.21ZM159.442 23.7369C158.235 23.7369 157.174 23.5027 156.265 23.0381C155.356 22.5735 154.633 21.9381 154.105 21.132C153.597 20.306 153.342 19.3729 153.342 18.3364C153.342 17.1491 153.648 16.1564 154.264 15.3503C154.879 14.5243 155.705 13.9088 156.742 13.5078C157.778 13.0829 158.922 12.8724 160.173 12.8724C160.891 12.8724 161.558 12.9359 162.174 13.063C162.789 13.1702 163.318 13.3052 163.762 13.476C164.227 13.6229 164.576 13.7738 164.811 13.9207V12.9359C164.811 11.7089 164.378 10.7321 163.508 10.0133C162.638 9.29457 161.582 8.93321 160.331 8.93321C159.442 8.93321 158.604 9.13573 157.822 9.5368C157.059 9.91801 156.456 10.4581 156.011 11.1569L153.978 9.6321C154.403 8.99675 154.931 8.44478 155.566 7.98018C156.202 7.51558 156.92 7.15422 157.726 6.90008C158.552 6.64594 159.422 6.51887 160.331 6.51887C162.535 6.51887 164.259 7.1026 165.509 8.26609C166.76 9.42958 167.384 10.9981 167.384 12.9677V23.2287H164.811V20.9096H164.684C164.41 21.3742 164.016 21.8309 163.508 22.2756C163 22.7005 162.396 23.05 161.697 23.324C161.018 23.598 160.268 23.7369 159.442 23.7369ZM159.696 21.3544C160.629 21.3544 161.475 21.1201 162.237 20.6555C163.02 20.1909 163.647 19.5635 164.112 18.7812C164.576 17.9989 164.811 17.1412 164.811 16.208C164.322 15.8705 163.711 15.5925 162.968 15.382C162.249 15.1716 161.455 15.0644 160.585 15.0644C159.041 15.0644 157.905 15.382 157.186 16.0174C156.468 16.6528 156.106 17.435 156.106 18.3682C156.106 19.3014 156.444 19.9764 157.123 20.5284C157.802 21.0804 158.66 21.3544 159.696 21.3544Z"
37
+ fill="url(#paint0_linear_7159_1111)"/>
38
+ </g>
39
+ <defs>
40
+ <linearGradient id="paint0_linear_7159_1111" x1="112.14" y1="29.0382" x2="116.119" y2="4.52148"
41
+ gradientUnits="userSpaceOnUse">
42
+ <stop stopColor="#3B6BFF"/>
43
+ <stop offset="0.59" stopColor="#2E96FF"/>
44
+ <stop offset="1" stopColor="#ACB7FF"/>
45
+ </linearGradient>
46
+ </defs>
47
+ </svg>
48
+ );
49
+
50
+ export default IconGemma;