MoritzMMuller commited on
Commit
052bdca
·
verified ·
1 Parent(s): e7cb354

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +336 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,338 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ %%writefile saved_models/app.py
2
+ import os
 
3
  import streamlit as st
4
+ from PIL import Image
5
+ import pandas as pd
6
+ from datetime import datetime
7
+ from transformers import (
8
+ AutoFeatureExtractor,
9
+ AutoModelForImageClassification,
10
+ AutoTokenizer,
11
+ AutoModelForSeq2SeqLM,
12
+ pipeline,
13
+ Qwen2VLForConditionalGeneration, Qwen2VLProcessor,
14
+ )
15
+ import requests
16
+ from geopy.geocoders import Nominatim
17
+ import folium
18
+ from streamlit_folium import st_folium
19
+ import cv2
20
+ import numpy as np
21
+
22
+
23
+ st.set_page_config(page_title="Skin Cancer Dashboard", layout="wide")
24
+
25
+ # --- Configuration ---
26
+ # Ensure you have set your Hugging Face token as an environment variable:
27
+ # export HF_TOKEN="YOUR_TOKEN_HERE"
28
+ MODEL_NAME = "Anwarkh1/Skin_Cancer-Image_Classification"
29
+ LLM_NAME = "google/flan-t5-xl"
30
+ HF_TOKEN = ".."
31
+ DATA_DIR = "data/harvard_dataset" # Path where you download and unpack the Harvard Dataverse dataset
32
+ DIARY_CSV = "diary.csv"
33
+ CANCER_DIR = r"D:\Models\googleflan-t5-xl"
34
+ LLM_DIR = r"D:\Models\SkinCancer"
35
+
36
+ # Initialize session state defaults
37
+ if 'initialized' not in st.session_state:
38
+ st.session_state['label'] = None
39
+ st.session_state['score'] = None
40
+ st.session_state['mole_id'] = ''
41
+ st.session_state['geo_location'] = ''
42
+ st.session_state['chat_history'] = []
43
+ st.session_state['initialized'] = True
44
+
45
+ # Initialize geolocator for free geocoding
46
+ geolocator = Nominatim(user_agent="skin-dashboard", timeout = 10)
47
+
48
+ # --- Load Model & Feature Extractor ---
49
+ @st.cache_resource
50
+ def load_image_model(token: str):
51
+ extractor = AutoFeatureExtractor.from_pretrained(
52
+ MODEL_NAME,
53
+ use_auth_token=token
54
+ )
55
+ model = AutoModelForImageClassification.from_pretrained(
56
+ MODEL_NAME,
57
+ use_auth_token=token
58
+ )
59
+ return pipeline(
60
+ "image-classification",
61
+ model=model,
62
+ feature_extractor=extractor,
63
+ device=0 # set to GPU index or -1 for CPU
64
+ )
65
+
66
+ @st.cache_resource
67
+ def load_llm(token: str):
68
+
69
+ tokenizer = AutoTokenizer.from_pretrained(
70
+ LLM_NAME,
71
+ use_auth_token=token
72
+ )
73
+ # Use Seq2SeqLM for T5-style (text2text) models:
74
+ model = AutoModelForSeq2SeqLM.from_pretrained(
75
+ LLM_NAME,
76
+ use_auth_token=token,
77
+ )
78
+ return pipeline(
79
+ "text2text-generation",
80
+ model=model,
81
+ tokenizer=tokenizer,
82
+ device_map="auto", # or device=0 for single GPU / -1 for CPU
83
+ max_length=10000,
84
+ num_beams=5,
85
+ no_repeat_ngram_size=2,
86
+ early_stopping=True,
87
+
88
+ )
89
+ classifier = load_image_model(HF_TOKEN) if HF_TOKEN else None
90
+ explainer = load_llm(HF_TOKEN) if HF_TOKEN else None
91
+
92
+ # --- Diary Init ----
93
+
94
+ if not os.path.exists(DIARY_CSV):
95
+ pd.DataFrame(
96
+ columns=["timestamp", "image_path", "mole_id", "geo_location", "label", "score",
97
+ "body_location", "prior_consultation", "pain", "itch"]
98
+ ).to_csv(DIARY_CSV, index=False)
99
+
100
+ # --- Save entry helper
101
+
102
+ def save_entry(img_path: str, mole_id: str, geo_location: str,
103
+ label: str, score: float,
104
+ body_location: str, prior_consult: str, pain: str, itch: str):
105
+ df = pd.read_csv(DIARY_CSV)
106
+ entry = {
107
+ "timestamp": datetime.now().isoformat(),
108
+ "image_path": img_path,
109
+ "mole_id": mole_id,
110
+ "geo_location": geo_location,
111
+ "label": label,
112
+ "score": float(score),
113
+ "body_location": body_location,
114
+ "prior_consultation": prior_consult,
115
+ "pain": pain,
116
+ "itch": itch
117
+ }
118
+ df.loc[len(df)] = entry
119
+ df.to_csv(DIARY_CSV, index=False)
120
+
121
+ # --- Preprocessing Functions ---
122
+ def remove_hair(img: np.ndarray) -> np.ndarray:
123
+ gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
124
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 17))
125
+ blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)
126
+ _, mask = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
127
+ return cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)
128
+
129
+
130
+ def preprocess(img: Image.Image, size: int = 224) -> Image.Image:
131
+ arr = np.array(img)
132
+ bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
133
+ bgr = remove_hair(bgr)
134
+ bgr = cv2.bilateralFilter(bgr, d=9, sigmaColor=75, sigmaSpace=75)
135
+ lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
136
+ l, a, b = cv2.split(lab)
137
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
138
+ cl = clahe.apply(l)
139
+ merged = cv2.merge((cl, a, b))
140
+ bgr = cv2.cvtColor(merged, cv2.COLOR_LAB2BGR)
141
+ h, w = bgr.shape[:2]
142
+ scale = size / max(h, w)
143
+ nh, nw = int(h*scale), int(w*scale)
144
+ resized = cv2.resize(bgr, (nw, nh), interpolation=cv2.INTER_AREA)
145
+ canvas = np.full((size, size, 3), 128, dtype=np.uint8)
146
+ top, left = (size-nh)//2, (size-nw)//2
147
+ canvas[top:top+nh, left:left+nw] = resized
148
+ rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
149
+ return Image.fromarray(rgb)
150
+
151
+ # -----Streamlit layout ----
152
+ st.title("🩺 Skin Cancer Recognition Dashboard")
153
+ menu = ["Scan Mole","Chat","Diary", "Dataset Explorer"]
154
+ choice = st.sidebar.selectbox("Navigation", menu)
155
+
156
+ # --- Initialize Scan a Mole ---
157
+ if choice == "Scan Mole":
158
+ st.header("🔍 Scan a Mole")
159
+ if not classifier:
160
+ st.error("Missing HF_TOKEN.")
161
+ st.stop()
162
+
163
+ upload = st.file_uploader("Upload a skin image", type=["jpg","jpeg","png"])
164
+ if not upload:
165
+ st.info("Please upload an image to begin.")
166
+ st.stop()
167
+
168
+ raw = Image.open(upload).convert("RGB")
169
+ st.image(raw, caption="Original", use_container_width=True)
170
+
171
+ proc = preprocess(raw)
172
+ st.image(proc, caption="Preprocessed", use_container_width=True)
173
+
174
+ mole = st.text_input("Mole ID")
175
+ city = st.text_input("Geographic location")
176
+ body = st.selectbox("Body location", ["Face","Scalp","Neck","Chest","Back","Arm","Hand","Leg","Foot","Other"])
177
+ prior = st.radio("Prior consult?", ["Yes","No"], horizontal=True)
178
+ pain = st.radio("Pain?", ["Yes","No"], horizontal=True)
179
+ itch = st.radio("Itch?", ["Yes","No"], horizontal=True)
180
+
181
+ if st.button("Classify"):
182
+ if not mole or not city:
183
+ st.error("Enter ID and location.")
184
+ else:
185
+ with st.spinner("Analyzing..."):
186
+ out = classifier(proc)
187
+ lbl, scr = out[0]["label"], out[0]["score"]
188
+ save_dir = os.path.join("scans", f"{mole}_{datetime.now().timestamp()}.png")
189
+ os.makedirs(os.path.dirname(save_dir), exist_ok=True)
190
+ raw.save(save_dir)
191
+ save_entry(save_dir, mole, city, lbl, scr, body, prior, pain, itch)
192
+ st.session_state.update({
193
+ 'label': lbl,
194
+ 'score': scr,
195
+ 'mole_id': mole,
196
+ 'geo_location': city
197
+ })
198
+
199
+ if st.session_state['label']:
200
+ st.success(f"Prediction: {st.session_state['label']} (score {st.session_state['score']:.2f})")
201
+ if explainer:
202
+ with st.spinner("Explaining..."):
203
+ text = explainer(f"Explain {st.session_state['label']} and recommendation.")[0]['generated_text']
204
+ st.markdown("### Explanation"); st.write(text)
205
+
206
+ loc = geolocator.geocode(st.session_state['geo_location'])
207
+ if loc:
208
+ m = folium.Map([loc.latitude, loc.longitude], zoom_start=12)
209
+ folium.Marker([loc.latitude, loc.longitude], "You").add_to(m)
210
+ resp = requests.post(
211
+ "https://overpass-api.de/api/interpreter",
212
+ data={"data": f"[out:json];node(around:5000,{loc.latitude},{loc.longitude})[~\"^(amenity|healthcare)$\"~\"clinic|doctors\"];out;"}
213
+ )
214
+ for el in resp.json().get('elements', []):
215
+ tags = el.get('tags', {});
216
+ lat = el.get('lat') or el['center']['lat']; lon = el.get('lon') or el['center']['lon']
217
+ folium.Marker([lat, lon], tags.get('name','Clinic')).add_to(m)
218
+ st.markdown("### Nearby Clinics"); st_folium(m, width=700)
219
+
220
+ # --- Chat Tab ---
221
+ elif choice == "Chat":
222
+ st.header("💬 Follow-Up Chat")
223
+ if not st.session_state['label']:
224
+ st.info("Please perform a scan first in the 'Scan Mole' tab.")
225
+ else:
226
+ lbl = st.session_state['label']
227
+ scr = st.session_state['score']
228
+ mid = st.session_state['mole_id']
229
+ gloc = st.session_state['geo_location']
230
+ st.markdown(f"**Context:** prediction for **{mid}** at **{gloc}** is **{lbl}** (confidence {scr:.2f}).")
231
+
232
+ # New user message comes first for immediate loop
233
+ user_q = st.chat_input("Ask a follow-up question:", key="chat_input")
234
+ if user_q and explainer:
235
+ st.session_state['chat_history'].append({'role':'user','content':user_q})
236
+ system_p = "You are a dermatology assistant. Provide concise medical advice without clarifying questions."
237
+ tpl = (
238
+ f"{system_p}\nContext: prediction is {lbl} with confidence {scr:.2f}.\n"
239
+ f"User: {user_q}\nAssistant:"
240
+ )
241
+ with st.spinner("Generating response..."):
242
+ reply = explainer(tpl)[0]['generated_text']
243
+ st.session_state['chat_history'].append({'role':'assistant','content':reply})
244
+
245
+ # Display the updated chat history
246
+ for msg in st.session_state['chat_history']:
247
+ prefix = 'You' if msg['role']=='user' else 'AI'
248
+ st.markdown(f"**{prefix}:** {msg['content']}")
249
+
250
+
251
+ # --- Diary Page ---
252
+ elif choice == "Diary":
253
+ st.header("📖 Skin Cancer Diary")
254
+ df = pd.read_csv(DIARY_CSV)
255
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
256
+ if df.empty:
257
+ st.info("No diary entries yet.")
258
+ else:
259
+ mole_ids = sorted(df['mole_id'].unique())
260
+ sel = st.selectbox("Select Mole to View", ['All'] + mole_ids, key="diary_sel")
261
+ if sel == 'All':
262
+ # Display moles in columns (max 3 per row)
263
+ chunks = [mole_ids[i:i+3] for i in range(0, len(mole_ids), 3)]
264
+ for group in chunks:
265
+ cols = st.columns(len(group))
266
+ for col, mid in zip(cols, group):
267
+ with col:
268
+ st.subheader(mid)
269
+ entries = df[df['mole_id'] == mid].sort_values('timestamp')
270
+ # Show image timeline
271
+ for _, row in entries.iterrows():
272
+ if os.path.exists(row['image_path']):
273
+ st.image(
274
+ row['image_path'],
275
+ width=150,
276
+ caption=f"{row['timestamp'].strftime('%Y-%m-%d')} — {row['score']:.2f}"
277
+ )
278
+ st.write(f"Total scans: {len(entries)}")
279
+ else:
280
+ # Detailed view for a single mole
281
+ entries = df[df['mole_id'] == sel].sort_values('timestamp')
282
+ if entries.empty:
283
+ st.warning(f"No entries for {sel}.")
284
+ else:
285
+ # Score over time
286
+ st.line_chart(entries.set_index('timestamp')['score'])
287
+ st.markdown("#### Image Timeline")
288
+ for _, row in entries.iterrows():
289
+ if os.path.exists(row['image_path']):
290
+ st.image(
291
+ row['image_path'],
292
+ width=200,
293
+ caption=(
294
+ f"{row['timestamp'].strftime('%Y-%m-%d %H:%M')} — "
295
+ f"Score: {row['score']:.2f}"
296
+ )
297
+ )
298
+ st.markdown("#### Details")
299
+ st.dataframe(
300
+ entries[
301
+ ['timestamp','geo_location','label','score',
302
+ 'body_location','prior_consultation','pain','itch']
303
+ ]
304
+ .rename(columns={
305
+ 'timestamp':'Time','geo_location':'Location',
306
+ 'label':'Diagnosis','score':'Confidence',
307
+ 'body_location':'Body Part','prior_consultation':'Prior Consult',
308
+ 'pain':'Pain','itch':'Itch'
309
+ })
310
+ .sort_values('Time', ascending=False)
311
+ )
312
+
313
+ else:
314
+ st.header("📂 Dataset Explorer")
315
+ st.write("Preview images from the Harvard Skin Cancer Dataset")
316
+
317
+ # pick up to 15 image files
318
+ image_files = [
319
+ f for f in os.listdir(DATA_DIR)
320
+ if os.path.isfile(os.path.join(DATA_DIR, f))
321
+ and f.lower().endswith((".jpg", ".jpeg", ".png"))
322
+ ][:15]
323
+
324
+ for i in range(0, len(image_files), 3):
325
+ cols = st.columns(3)
326
+ for col, fn in zip(cols, image_files[i : i + 3]):
327
+ path = os.path.join(DATA_DIR, fn)
328
+ img = Image.open(path)
329
+ col.image(img, use_container_width=True)
330
+ col.caption(fn)
331
+
332
+ st.sidebar.markdown("---")
333
+ st.sidebar.write("Dataset powered by Harvard Dataverse [DBW86T]")
334
+ st.sidebar.write(f"Model: {MODEL_NAME}")
335
+ st.sidebar.write(f"LLM: {LLM_NAME}")
336
 
337
+ if __name__ == '__main__':
338
+ st.write()