MoritzMMuller commited on
Commit
fbdc368
·
verified ·
1 Parent(s): 128f6e3

Delete src

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +0 -334
src/streamlit_app.py DELETED
@@ -1,334 +0,0 @@
1
- import os
2
- import streamlit as st
3
- from PIL import Image
4
- import pandas as pd
5
- from datetime import datetime
6
- from transformers import (
7
- AutoFeatureExtractor,
8
- AutoModelForImageClassification,
9
- AutoTokenizer,
10
- AutoModelForSeq2SeqLM,
11
- pipeline )
12
- import requests
13
- from geopy.geocoders import Nominatim
14
- import folium
15
- from streamlit_folium import st_folium
16
- import cv2
17
- import numpy as np
18
-
19
-
20
- st.set_page_config(page_title="Skin Cancer Dashboard", layout="wide")
21
-
22
- # --- Configuration ---
23
- # Ensure you have set your Hugging Face token as an environment variable:
24
- # export HF_TOKEN="YOUR_TOKEN_HERE"
25
- MODEL_NAME = "Anwarkh1/Skin_Cancer-Image_Classification"
26
- LLM_NAME = "google/flan-t5-xl"
27
- HF_TOKEN = ".."
28
- DATA_DIR = "data/harvard_dataset" # Path where you download and unpack the Harvard Dataverse dataset
29
- DIARY_CSV = "diary.csv"
30
-
31
-
32
- # Initialize session state defaults
33
- if 'initialized' not in st.session_state:
34
- st.session_state['label'] = None
35
- st.session_state['score'] = None
36
- st.session_state['mole_id'] = ''
37
- st.session_state['geo_location'] = ''
38
- st.session_state['chat_history'] = []
39
- st.session_state['initialized'] = True
40
-
41
- # Initialize geolocator for free geocoding
42
- geolocator = Nominatim(user_agent="skin-dashboard", timeout = 10)
43
-
44
- # --- Load Model & Feature Extractor ---
45
- @st.cache_resource
46
- def load_image_model(token: str):
47
- extractor = AutoFeatureExtractor.from_pretrained(
48
- MODEL_NAME,
49
- use_auth_token=token
50
- )
51
- model = AutoModelForImageClassification.from_pretrained(
52
- MODEL_NAME,
53
- use_auth_token=token
54
- )
55
- return pipeline(
56
- "image-classification",
57
- model=model,
58
- feature_extractor=extractor,
59
- device=0 # set to GPU index or -1 for CPU
60
- )
61
-
62
- @st.cache_resource
63
- def load_llm(token: str):
64
-
65
- tokenizer = AutoTokenizer.from_pretrained(
66
- LLM_NAME,
67
- use_auth_token=token
68
- )
69
- # Use Seq2SeqLM for T5-style (text2text) models:
70
- model = AutoModelForSeq2SeqLM.from_pretrained(
71
- LLM_NAME,
72
- use_auth_token=token,
73
- )
74
- return pipeline(
75
- "text2text-generation",
76
- model=model,
77
- tokenizer=tokenizer,
78
- device_map="auto", # or device=0 for single GPU / -1 for CPU
79
- max_length=10000,
80
- num_beams=5,
81
- no_repeat_ngram_size=2,
82
- early_stopping=True,
83
-
84
- )
85
- classifier = load_image_model(HF_TOKEN) if HF_TOKEN else None
86
- explainer = load_llm(HF_TOKEN) if HF_TOKEN else None
87
-
88
- # --- Diary Init ----
89
-
90
- if not os.path.exists(DIARY_CSV):
91
- pd.DataFrame(
92
- columns=["timestamp", "image_path", "mole_id", "geo_location", "label", "score",
93
- "body_location", "prior_consultation", "pain", "itch"]
94
- ).to_csv(DIARY_CSV, index=False)
95
-
96
- # --- Save entry helper
97
-
98
- def save_entry(img_path: str, mole_id: str, geo_location: str,
99
- label: str, score: float,
100
- body_location: str, prior_consult: str, pain: str, itch: str):
101
- df = pd.read_csv(DIARY_CSV)
102
- entry = {
103
- "timestamp": datetime.now().isoformat(),
104
- "image_path": img_path,
105
- "mole_id": mole_id,
106
- "geo_location": geo_location,
107
- "label": label,
108
- "score": float(score),
109
- "body_location": body_location,
110
- "prior_consultation": prior_consult,
111
- "pain": pain,
112
- "itch": itch
113
- }
114
- df.loc[len(df)] = entry
115
- df.to_csv(DIARY_CSV, index=False)
116
-
117
- # --- Preprocessing Functions ---
118
- def remove_hair(img: np.ndarray) -> np.ndarray:
119
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
120
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 17))
121
- blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)
122
- _, mask = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
123
- return cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)
124
-
125
-
126
- def preprocess(img: Image.Image, size: int = 224) -> Image.Image:
127
- arr = np.array(img)
128
- bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
129
- bgr = remove_hair(bgr)
130
- bgr = cv2.bilateralFilter(bgr, d=9, sigmaColor=75, sigmaSpace=75)
131
- lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
132
- l, a, b = cv2.split(lab)
133
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
134
- cl = clahe.apply(l)
135
- merged = cv2.merge((cl, a, b))
136
- bgr = cv2.cvtColor(merged, cv2.COLOR_LAB2BGR)
137
- h, w = bgr.shape[:2]
138
- scale = size / max(h, w)
139
- nh, nw = int(h*scale), int(w*scale)
140
- resized = cv2.resize(bgr, (nw, nh), interpolation=cv2.INTER_AREA)
141
- canvas = np.full((size, size, 3), 128, dtype=np.uint8)
142
- top, left = (size-nh)//2, (size-nw)//2
143
- canvas[top:top+nh, left:left+nw] = resized
144
- rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
145
- return Image.fromarray(rgb)
146
-
147
- # -----Streamlit layout ----
148
- st.title("🩺 Skin Cancer Recognition Dashboard")
149
- menu = ["Scan Mole","Chat","Diary", "Dataset Explorer"]
150
- choice = st.sidebar.selectbox("Navigation", menu)
151
-
152
- # --- Initialize Scan a Mole ---
153
- if choice == "Scan Mole":
154
- st.header("🔍 Scan a Mole")
155
- if not classifier:
156
- st.error("Missing HF_TOKEN.")
157
- st.stop()
158
-
159
- upload = st.file_uploader("Upload a skin image", type=["jpg","jpeg","png"])
160
- if not upload:
161
- st.info("Please upload an image to begin.")
162
- st.stop()
163
-
164
- raw = Image.open(upload).convert("RGB")
165
- st.image(raw, caption="Original", use_container_width=True)
166
-
167
- proc = preprocess(raw)
168
- st.image(proc, caption="Preprocessed", use_container_width=True)
169
-
170
- mole = st.text_input("Mole ID")
171
- city = st.text_input("Geographic location")
172
- body = st.selectbox("Body location", ["Face","Scalp","Neck","Chest","Back","Arm","Hand","Leg","Foot","Other"])
173
- prior = st.radio("Prior consult?", ["Yes","No"], horizontal=True)
174
- pain = st.radio("Pain?", ["Yes","No"], horizontal=True)
175
- itch = st.radio("Itch?", ["Yes","No"], horizontal=True)
176
-
177
- if st.button("Classify"):
178
- if not mole or not city:
179
- st.error("Enter ID and location.")
180
- else:
181
- with st.spinner("Analyzing..."):
182
- out = classifier(proc)
183
- lbl, scr = out[0]["label"], out[0]["score"]
184
- save_dir = os.path.join("scans", f"{mole}_{datetime.now().timestamp()}.png")
185
- os.makedirs(os.path.dirname(save_dir), exist_ok=True)
186
- raw.save(save_dir)
187
- save_entry(save_dir, mole, city, lbl, scr, body, prior, pain, itch)
188
- st.session_state.update({
189
- 'label': lbl,
190
- 'score': scr,
191
- 'mole_id': mole,
192
- 'geo_location': city
193
- })
194
-
195
- if st.session_state['label']:
196
- st.success(f"Prediction: {st.session_state['label']} (score {st.session_state['score']:.2f})")
197
- if explainer:
198
- with st.spinner("Explaining..."):
199
- text = explainer(f"Explain {st.session_state['label']} and recommendation.")[0]['generated_text']
200
- st.markdown("### Explanation"); st.write(text)
201
-
202
- loc = geolocator.geocode(st.session_state['geo_location'])
203
- if loc:
204
- m = folium.Map([loc.latitude, loc.longitude], zoom_start=12)
205
- folium.Marker([loc.latitude, loc.longitude], "You").add_to(m)
206
- resp = requests.post(
207
- "https://overpass-api.de/api/interpreter",
208
- data={"data": f"[out:json];node(around:5000,{loc.latitude},{loc.longitude})[~\"^(amenity|healthcare)$\"~\"clinic|doctors\"];out;"}
209
- )
210
- for el in resp.json().get('elements', []):
211
- tags = el.get('tags', {});
212
- lat = el.get('lat') or el['center']['lat']; lon = el.get('lon') or el['center']['lon']
213
- folium.Marker([lat, lon], tags.get('name','Clinic')).add_to(m)
214
- st.markdown("### Nearby Clinics"); st_folium(m, width=700)
215
-
216
- # --- Chat Tab ---
217
- elif choice == "Chat":
218
- st.header("💬 Follow-Up Chat")
219
- if not st.session_state['label']:
220
- st.info("Please perform a scan first in the 'Scan Mole' tab.")
221
- else:
222
- lbl = st.session_state['label']
223
- scr = st.session_state['score']
224
- mid = st.session_state['mole_id']
225
- gloc = st.session_state['geo_location']
226
- st.markdown(f"**Context:** prediction for **{mid}** at **{gloc}** is **{lbl}** (confidence {scr:.2f}).")
227
-
228
- # New user message comes first for immediate loop
229
- user_q = st.chat_input("Ask a follow-up question:", key="chat_input")
230
- if user_q and explainer:
231
- st.session_state['chat_history'].append({'role':'user','content':user_q})
232
- system_p = "You are a dermatology assistant. Provide concise medical advice without clarifying questions."
233
- tpl = (
234
- f"{system_p}\nContext: prediction is {lbl} with confidence {scr:.2f}.\n"
235
- f"User: {user_q}\nAssistant:"
236
- )
237
- with st.spinner("Generating response..."):
238
- reply = explainer(tpl)[0]['generated_text']
239
- st.session_state['chat_history'].append({'role':'assistant','content':reply})
240
-
241
- # Display the updated chat history
242
- for msg in st.session_state['chat_history']:
243
- prefix = 'You' if msg['role']=='user' else 'AI'
244
- st.markdown(f"**{prefix}:** {msg['content']}")
245
-
246
-
247
- # --- Diary Page ---
248
- elif choice == "Diary":
249
- st.header("📖 Skin Cancer Diary")
250
- df = pd.read_csv(DIARY_CSV)
251
- df['timestamp'] = pd.to_datetime(df['timestamp'])
252
- if df.empty:
253
- st.info("No diary entries yet.")
254
- else:
255
- mole_ids = sorted(df['mole_id'].unique())
256
- sel = st.selectbox("Select Mole to View", ['All'] + mole_ids, key="diary_sel")
257
- if sel == 'All':
258
- # Display moles in columns (max 3 per row)
259
- chunks = [mole_ids[i:i+3] for i in range(0, len(mole_ids), 3)]
260
- for group in chunks:
261
- cols = st.columns(len(group))
262
- for col, mid in zip(cols, group):
263
- with col:
264
- st.subheader(mid)
265
- entries = df[df['mole_id'] == mid].sort_values('timestamp')
266
- # Show image timeline
267
- for _, row in entries.iterrows():
268
- if os.path.exists(row['image_path']):
269
- st.image(
270
- row['image_path'],
271
- width=150,
272
- caption=f"{row['timestamp'].strftime('%Y-%m-%d')} — {row['score']:.2f}"
273
- )
274
- st.write(f"Total scans: {len(entries)}")
275
- else:
276
- # Detailed view for a single mole
277
- entries = df[df['mole_id'] == sel].sort_values('timestamp')
278
- if entries.empty:
279
- st.warning(f"No entries for {sel}.")
280
- else:
281
- # Score over time
282
- st.line_chart(entries.set_index('timestamp')['score'])
283
- st.markdown("#### Image Timeline")
284
- for _, row in entries.iterrows():
285
- if os.path.exists(row['image_path']):
286
- st.image(
287
- row['image_path'],
288
- width=200,
289
- caption=(
290
- f"{row['timestamp'].strftime('%Y-%m-%d %H:%M')} — "
291
- f"Score: {row['score']:.2f}"
292
- )
293
- )
294
- st.markdown("#### Details")
295
- st.dataframe(
296
- entries[
297
- ['timestamp','geo_location','label','score',
298
- 'body_location','prior_consultation','pain','itch']
299
- ]
300
- .rename(columns={
301
- 'timestamp':'Time','geo_location':'Location',
302
- 'label':'Diagnosis','score':'Confidence',
303
- 'body_location':'Body Part','prior_consultation':'Prior Consult',
304
- 'pain':'Pain','itch':'Itch'
305
- })
306
- .sort_values('Time', ascending=False)
307
- )
308
-
309
- else:
310
- st.header("📂 Dataset Explorer")
311
- st.write("Preview images from the Harvard Skin Cancer Dataset")
312
-
313
- # pick up to 15 image files
314
- image_files = [
315
- f for f in os.listdir(DATA_DIR)
316
- if os.path.isfile(os.path.join(DATA_DIR, f))
317
- and f.lower().endswith((".jpg", ".jpeg", ".png"))
318
- ][:15]
319
-
320
- for i in range(0, len(image_files), 3):
321
- cols = st.columns(3)
322
- for col, fn in zip(cols, image_files[i : i + 3]):
323
- path = os.path.join(DATA_DIR, fn)
324
- img = Image.open(path)
325
- col.image(img, use_container_width=True)
326
- col.caption(fn)
327
-
328
- st.sidebar.markdown("---")
329
- st.sidebar.write("Dataset powered by Harvard Dataverse [DBW86T]")
330
- st.sidebar.write(f"Model: {MODEL_NAME}")
331
- st.sidebar.write(f"LLM: {LLM_NAME}")
332
-
333
- if __name__ == '__main__':
334
- st.write()