{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sybil - Lung Cancer Risk Prediction\\n", "\\n", "This notebook demonstrates how to use the Sybil model from Hugging Face for lung cancer risk prediction." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Install Requirements" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install huggingface-hub torch torchvision pydicom sybil requests" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Load Model from Hugging Face" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import snapshot_download\\n", "import sys\\n", "\\n", "# Download model\\n", "print(\"Downloading Sybil model from Hugging Face...\")\\n", "model_path = snapshot_download(repo_id=\"Lab-Rasool/sybil\")\\n", "sys.path.append(model_path)\\n", "\\n", "# Import model\\n", "from modeling_sybil_wrapper import SybilHFWrapper\\n", "from configuration_sybil import SybilConfig\\n", "\\n", "# Initialize\\n", "config = SybilConfig()\\n", "model = SybilHFWrapper(config)\\n", "print(\"✅ Model loaded successfully!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Download Demo Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import requests\\n", "import zipfile\\n", "from io import BytesIO\\n", "import os\\n", "\\n", "def get_demo_data():\\n", " cache_dir = os.path.expanduser(\"~/.sybil_demo\")\\n", " demo_dir = os.path.join(cache_dir, \"sybil_demo_data\")\\n", " \\n", " if not os.path.exists(demo_dir):\\n", " print(\"Downloading demo DICOM files...\")\\n", " url = \"https://www.dropbox.com/scl/fi/covbvo6f547kak4em3cjd/sybil_example.zip?rlkey=7a13nhlc9uwga9x7pmtk1cf1c&dl=1\"\\n", " response = requests.get(url)\\n", " \\n", " os.makedirs(cache_dir, exist_ok=True)\\n", " with zipfile.ZipFile(BytesIO(response.content)) as zf:\\n", " zf.extractall(cache_dir)\\n", " \\n", " # Find DICOM files\\n", " dicom_files = []\\n", " for root, dirs, files in os.walk(cache_dir):\\n", " for file in files:\\n", " if file.endswith('.dcm'):\\n", " dicom_files.append(os.path.join(root, file))\\n", " \\n", " print(f\"Found {len(dicom_files)} DICOM files\")\\n", " return sorted(dicom_files)\\n", "\\n", "# Get demo data\\n", "dicom_files = get_demo_data()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Run Prediction" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Run prediction\\n", "print(\"Running lung cancer risk prediction...\")\\n", "output = model(dicom_paths=dicom_files)\\n", "risk_scores = output.risk_scores.numpy()\\n", "\\n", "# Display results\\n", "print(\"\\n\" + \"=\"*40)\\n", "print(\"Lung Cancer Risk Predictions\")\\n", "print(\"=\"*40)\\n", "\\n", "for i, score in enumerate(risk_scores):\\n", " risk_pct = score * 100\\n", " bar_length = int(risk_pct * 2) # Scale for visualization\\n", " bar = '█' * bar_length + '░' * (30 - bar_length)\\n", " print(f\"Year {i+1}: {bar} {risk_pct:.1f}%\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Visualize Risk Progression" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\\n", "import numpy as np\\n", "\\n", "# Create visualization\\n", "years = np.arange(1, 7)\\n", "risk_percentages = risk_scores * 100\\n", "\\n", "plt.figure(figsize=(10, 6))\\n", "plt.bar(years, risk_percentages, color=['green', 'green', 'yellow', 'yellow', 'orange', 'orange'])\\n", "plt.xlabel('Years from Scan', fontsize=12)\\n", "plt.ylabel('Lung Cancer Risk (%)', fontsize=12)\\n", "plt.title('Predicted Lung Cancer Risk Over Time', fontsize=14, fontweight='bold')\\n", "plt.grid(axis='y', alpha=0.3)\\n", "\\n", "# Add value labels on bars\\n", "for i, (year, risk) in enumerate(zip(years, risk_percentages)):\\n", " plt.text(year, risk + 0.5, f'{risk:.1f}%', ha='center', fontweight='bold')\\n", "\\n", "plt.tight_layout()\\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Using Your Own Data\\n", "\\n", "To use your own CT scan data, replace the demo data with your DICOM file paths:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Example with your own data (uncomment and modify)\\n", "# my_dicom_files = [\\n", "# \"/path/to/your/scan/slice001.dcm\",\\n", "# \"/path/to/your/scan/slice002.dcm\",\\n", "# # ... add all slices\\n", "# ]\\n", "# \\n", "# output = model(dicom_paths=my_dicom_files)\\n", "# my_risk_scores = output.risk_scores.numpy()\\n", "# \\n", "# for i, score in enumerate(my_risk_scores):\\n", "# print(f\"Year {i+1}: {score*100:.1f}% risk\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Important Notes\\n", "\\n", "⚠️ **Medical Disclaimer**: This model is for research and educational purposes. Always consult qualified healthcare professionals for medical decisions.\\n", "\\n", "📚 **Citation**: If you use this model in research, please cite:\\n", "```\\n", "Mikhael, P.G., Wohlwend, J., Yala, A. et al. (2023).\\n", "Sybil: A validated deep learning model to predict future lung cancer risk\\n", "from a single low-dose chest computed tomography.\\n", "Journal of Clinical Oncology, 41(12), 2191-2200.\\n", "```" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 4 }