import streamlit as st from idc_index import index from pathlib import Path import pydicom import pandas as pd import pyarrow as pa import pyarrow.parquet as pq from tempfile import TemporaryDirectory import os from pathlib import Path import polars import pydicom.datadict as dd import shutil import papermill as pm import subprocess # Main Streamlit app code st.title("DICOM Classification Demo") st.write("Select IDC data to download, extract images and metadata, and perform inference using three pre-trained models") # Fetch IDC index client = index.IDCClient() index_df = client.index # Option to choose IDC data st.subheader("Choose IDC Data to Process") collection_ids = index_df["collection_id"].unique() selected_collection_id = st.selectbox("Select Collection ID", collection_ids) # Filter dataframe based on selected collection_id df_filtered_by_collection = index_df[index_df["collection_id"] == selected_collection_id] patients = df_filtered_by_collection["PatientID"].unique() selected_patient_id = st.selectbox("Select Patient ID", patients) # Filter dataframe based on selected patient_id df_filtered_by_patient = df_filtered_by_collection[df_filtered_by_collection["PatientID"] == selected_patient_id] modalities = df_filtered_by_patient["Modality"].unique() selected_modality = st.selectbox("Select Modality", modalities) # Filter dataframe based on selected modality df_filtered_by_modality = df_filtered_by_patient[df_filtered_by_patient["Modality"] == selected_modality] studies = df_filtered_by_modality["StudyInstanceUID"].unique() selected_study = st.selectbox("Select Study", studies) # Filter dataframe based on selected study df_filtered_by_study = df_filtered_by_modality[df_filtered_by_modality["StudyInstanceUID"] == selected_study] series = df_filtered_by_study["SeriesInstanceUID"].unique() selected_series = st.selectbox("Select Series", series) # Button to process IDC data if st.button("Process IDC data"): # Fetch data from IDC based on selection selection = index_df[ (index_df["SeriesInstanceUID"] == selected_series) ] series_instance_uids = selection["SeriesInstanceUID"].tolist() # with TemporaryDirectory() as temp_dir: download_errors = [] #input_dir = os.path.join(temp_dir, "input_data") input_dir=Path("input_data/") if input_dir.exists(): shutil.rmtree(input_dir) os.makedirs(input_dir, exist_ok=True) try: client.download_from_selection(seriesInstanceUID=series_instance_uids, downloadDir=input_dir) except Exception as e: download_errors.append(f"Error downloading data: {str(e)}") if download_errors: st.error("\n".join(download_errors)) else: st.success("Data downloaded successfully.") # Process downloaded DICOM data dicom_files = [str(file) for file in input_dir.glob('**/*.dcm')] # parquet_file = 'dcm2parquet_output.parquet' # save_dicom_header_to_parquet(dicom_files, parquet_file) st.success("Processing complete.") if os.path.exists("DICOMScanClassification_user_demo.ipynb"): os.remove("DICOMScanClassification_user_demo.ipynb") subprocess.run(["wget", "https://raw.githubusercontent.com/deepakri201/DICOMScanClassification_pw41/main/DICOMScanClassification_user_demo.ipynb"]) pm.execute_notebook( "DICOMScanClassification_user_demo.ipynb", 'output.ipynb', parameters = dict(SeriesInstanceUID=series_instance_uids) ) with open('output.ipynb', "rb") as f: st.download_button( label="Download the output notebook file", data=f, file_name="output.ipynb", mime="application/json" )