Upload test_sybil.ipynb
Browse files- test_sybil.ipynb +237 -0
test_sybil.ipynb
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Sybil - Lung Cancer Risk Prediction\\n",
|
8 |
+
"\\n",
|
9 |
+
"This notebook demonstrates how to use the Sybil model from Hugging Face for lung cancer risk prediction."
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "markdown",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"## 1. Install Requirements"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": null,
|
22 |
+
"metadata": {},
|
23 |
+
"outputs": [],
|
24 |
+
"source": [
|
25 |
+
"!pip install huggingface-hub torch torchvision pydicom sybil requests"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "markdown",
|
30 |
+
"metadata": {},
|
31 |
+
"source": [
|
32 |
+
"## 2. Load Model from Hugging Face"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "code",
|
37 |
+
"execution_count": null,
|
38 |
+
"metadata": {},
|
39 |
+
"outputs": [],
|
40 |
+
"source": [
|
41 |
+
"from huggingface_hub import snapshot_download\\n",
|
42 |
+
"import sys\\n",
|
43 |
+
"\\n",
|
44 |
+
"# Download model\\n",
|
45 |
+
"print(\"Downloading Sybil model from Hugging Face...\")\\n",
|
46 |
+
"model_path = snapshot_download(repo_id=\"Lab-Rasool/sybil\")\\n",
|
47 |
+
"sys.path.append(model_path)\\n",
|
48 |
+
"\\n",
|
49 |
+
"# Import model\\n",
|
50 |
+
"from modeling_sybil_wrapper import SybilHFWrapper\\n",
|
51 |
+
"from configuration_sybil import SybilConfig\\n",
|
52 |
+
"\\n",
|
53 |
+
"# Initialize\\n",
|
54 |
+
"config = SybilConfig()\\n",
|
55 |
+
"model = SybilHFWrapper(config)\\n",
|
56 |
+
"print(\"✅ Model loaded successfully!\")"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cell_type": "markdown",
|
61 |
+
"metadata": {},
|
62 |
+
"source": [
|
63 |
+
"## 3. Download Demo Data"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "code",
|
68 |
+
"execution_count": null,
|
69 |
+
"metadata": {},
|
70 |
+
"outputs": [],
|
71 |
+
"source": [
|
72 |
+
"import requests\\n",
|
73 |
+
"import zipfile\\n",
|
74 |
+
"from io import BytesIO\\n",
|
75 |
+
"import os\\n",
|
76 |
+
"\\n",
|
77 |
+
"def get_demo_data():\\n",
|
78 |
+
" cache_dir = os.path.expanduser(\"~/.sybil_demo\")\\n",
|
79 |
+
" demo_dir = os.path.join(cache_dir, \"sybil_demo_data\")\\n",
|
80 |
+
" \\n",
|
81 |
+
" if not os.path.exists(demo_dir):\\n",
|
82 |
+
" print(\"Downloading demo DICOM files...\")\\n",
|
83 |
+
" url = \"https://www.dropbox.com/scl/fi/covbvo6f547kak4em3cjd/sybil_example.zip?rlkey=7a13nhlc9uwga9x7pmtk1cf1c&dl=1\"\\n",
|
84 |
+
" response = requests.get(url)\\n",
|
85 |
+
" \\n",
|
86 |
+
" os.makedirs(cache_dir, exist_ok=True)\\n",
|
87 |
+
" with zipfile.ZipFile(BytesIO(response.content)) as zf:\\n",
|
88 |
+
" zf.extractall(cache_dir)\\n",
|
89 |
+
" \\n",
|
90 |
+
" # Find DICOM files\\n",
|
91 |
+
" dicom_files = []\\n",
|
92 |
+
" for root, dirs, files in os.walk(cache_dir):\\n",
|
93 |
+
" for file in files:\\n",
|
94 |
+
" if file.endswith('.dcm'):\\n",
|
95 |
+
" dicom_files.append(os.path.join(root, file))\\n",
|
96 |
+
" \\n",
|
97 |
+
" print(f\"Found {len(dicom_files)} DICOM files\")\\n",
|
98 |
+
" return sorted(dicom_files)\\n",
|
99 |
+
"\\n",
|
100 |
+
"# Get demo data\\n",
|
101 |
+
"dicom_files = get_demo_data()"
|
102 |
+
]
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"cell_type": "markdown",
|
106 |
+
"metadata": {},
|
107 |
+
"source": [
|
108 |
+
"## 4. Run Prediction"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": null,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [],
|
116 |
+
"source": [
|
117 |
+
"# Run prediction\\n",
|
118 |
+
"print(\"Running lung cancer risk prediction...\")\\n",
|
119 |
+
"output = model(dicom_paths=dicom_files)\\n",
|
120 |
+
"risk_scores = output.risk_scores.numpy()\\n",
|
121 |
+
"\\n",
|
122 |
+
"# Display results\\n",
|
123 |
+
"print(\"\\n\" + \"=\"*40)\\n",
|
124 |
+
"print(\"Lung Cancer Risk Predictions\")\\n",
|
125 |
+
"print(\"=\"*40)\\n",
|
126 |
+
"\\n",
|
127 |
+
"for i, score in enumerate(risk_scores):\\n",
|
128 |
+
" risk_pct = score * 100\\n",
|
129 |
+
" bar_length = int(risk_pct * 2) # Scale for visualization\\n",
|
130 |
+
" bar = '█' * bar_length + '░' * (30 - bar_length)\\n",
|
131 |
+
" print(f\"Year {i+1}: {bar} {risk_pct:.1f}%\")"
|
132 |
+
]
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"cell_type": "markdown",
|
136 |
+
"metadata": {},
|
137 |
+
"source": [
|
138 |
+
"## 5. Visualize Risk Progression"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": null,
|
144 |
+
"metadata": {},
|
145 |
+
"outputs": [],
|
146 |
+
"source": [
|
147 |
+
"import matplotlib.pyplot as plt\\n",
|
148 |
+
"import numpy as np\\n",
|
149 |
+
"\\n",
|
150 |
+
"# Create visualization\\n",
|
151 |
+
"years = np.arange(1, 7)\\n",
|
152 |
+
"risk_percentages = risk_scores * 100\\n",
|
153 |
+
"\\n",
|
154 |
+
"plt.figure(figsize=(10, 6))\\n",
|
155 |
+
"plt.bar(years, risk_percentages, color=['green', 'green', 'yellow', 'yellow', 'orange', 'orange'])\\n",
|
156 |
+
"plt.xlabel('Years from Scan', fontsize=12)\\n",
|
157 |
+
"plt.ylabel('Lung Cancer Risk (%)', fontsize=12)\\n",
|
158 |
+
"plt.title('Predicted Lung Cancer Risk Over Time', fontsize=14, fontweight='bold')\\n",
|
159 |
+
"plt.grid(axis='y', alpha=0.3)\\n",
|
160 |
+
"\\n",
|
161 |
+
"# Add value labels on bars\\n",
|
162 |
+
"for i, (year, risk) in enumerate(zip(years, risk_percentages)):\\n",
|
163 |
+
" plt.text(year, risk + 0.5, f'{risk:.1f}%', ha='center', fontweight='bold')\\n",
|
164 |
+
"\\n",
|
165 |
+
"plt.tight_layout()\\n",
|
166 |
+
"plt.show()"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"cell_type": "markdown",
|
171 |
+
"metadata": {},
|
172 |
+
"source": [
|
173 |
+
"## 6. Using Your Own Data\\n",
|
174 |
+
"\\n",
|
175 |
+
"To use your own CT scan data, replace the demo data with your DICOM file paths:"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"cell_type": "code",
|
180 |
+
"execution_count": null,
|
181 |
+
"metadata": {},
|
182 |
+
"outputs": [],
|
183 |
+
"source": [
|
184 |
+
"# Example with your own data (uncomment and modify)\\n",
|
185 |
+
"# my_dicom_files = [\\n",
|
186 |
+
"# \"/path/to/your/scan/slice001.dcm\",\\n",
|
187 |
+
"# \"/path/to/your/scan/slice002.dcm\",\\n",
|
188 |
+
"# # ... add all slices\\n",
|
189 |
+
"# ]\\n",
|
190 |
+
"# \\n",
|
191 |
+
"# output = model(dicom_paths=my_dicom_files)\\n",
|
192 |
+
"# my_risk_scores = output.risk_scores.numpy()\\n",
|
193 |
+
"# \\n",
|
194 |
+
"# for i, score in enumerate(my_risk_scores):\\n",
|
195 |
+
"# print(f\"Year {i+1}: {score*100:.1f}% risk\")"
|
196 |
+
]
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"cell_type": "markdown",
|
200 |
+
"metadata": {},
|
201 |
+
"source": [
|
202 |
+
"## Important Notes\\n",
|
203 |
+
"\\n",
|
204 |
+
"⚠️ **Medical Disclaimer**: This model is for research and educational purposes. Always consult qualified healthcare professionals for medical decisions.\\n",
|
205 |
+
"\\n",
|
206 |
+
"📚 **Citation**: If you use this model in research, please cite:\\n",
|
207 |
+
"```\\n",
|
208 |
+
"Mikhael, P.G., Wohlwend, J., Yala, A. et al. (2023).\\n",
|
209 |
+
"Sybil: A validated deep learning model to predict future lung cancer risk\\n",
|
210 |
+
"from a single low-dose chest computed tomography.\\n",
|
211 |
+
"Journal of Clinical Oncology, 41(12), 2191-2200.\\n",
|
212 |
+
"```"
|
213 |
+
]
|
214 |
+
}
|
215 |
+
],
|
216 |
+
"metadata": {
|
217 |
+
"kernelspec": {
|
218 |
+
"display_name": "Python 3",
|
219 |
+
"language": "python",
|
220 |
+
"name": "python3"
|
221 |
+
},
|
222 |
+
"language_info": {
|
223 |
+
"codemirror_mode": {
|
224 |
+
"name": "ipython",
|
225 |
+
"version": 3
|
226 |
+
},
|
227 |
+
"file_extension": ".py",
|
228 |
+
"mimetype": "text/x-python",
|
229 |
+
"name": "python",
|
230 |
+
"nbconvert_exporter": "python",
|
231 |
+
"pygments_lexer": "ipython3",
|
232 |
+
"version": "3.8.0"
|
233 |
+
}
|
234 |
+
},
|
235 |
+
"nbformat": 4,
|
236 |
+
"nbformat_minor": 4
|
237 |
+
}
|