import gradio as gr import os import pandas as pd import plotly.graph_objects as go import asyncio from datetime import datetime import re import pathlib # --- Configuration --- # Update frequency: how many files to process before updating the UI UPDATE_INTERVAL = 250 def parse_filename_words(filename): """ Extracts contiguous groups of letters from a filename, ignoring numbers and symbols. Example: "Aarons123File-482.md" -> "Aarons, File, md" """ # Find all sequences of letters words = re.findall('[A-Za-z]+', filename) return ", ".join(words) if words else "N/A" def get_file_info(path, root_path): """ Gathers required information for a single file. Returns a dictionary or None if path is not a file or is inaccessible. """ try: if not os.path.isfile(path): return None stat = os.stat(path) size = stat.st_size # Skip empty files if size == 0: return None # Determine the top-level directory for color grouping try: relative_path = os.path.relpath(path, root_path) top_level_dir = relative_path.split(os.sep)[0] except ValueError: top_level_dir = os.path.basename(root_path) # Get parent directory relative to the root for treemap structure parent_path = str(pathlib.Path(*pathlib.Path(relative_path).parts[:-1])) if parent_path == ".": parent_path = top_level_dir return { 'path': path, 'label': os.path.basename(path), 'parent': parent_path, 'size': size, 'color_group': top_level_dir, 'created': datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), 'modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), 'keywords': parse_filename_words(os.path.basename(path)) } except (OSError, FileNotFoundError): return None def create_treemap_figure(df): """ Generates the Plotly treemap figure from a DataFrame of file info. """ if df.empty: return go.Figure(go.Treemap( labels=["Your scan will appear here."], parents=[""], values=[1] )) # Ensure the root of the treemap is visible root_label = os.path.basename(df.iloc[0]['path']) fig = go.Figure(go.Treemap( ids=df['path'], labels=df['label'], parents=df['parent'], values=df['size'], marker_colors=df['color_group'], # Color by top-level folder tiling_method='squarified', # Use the squarified algorithm root_label=root_label, customdata=df[['size', 'modified', 'created', 'keywords']], hovertemplate=( "%{label}
" "Size: %{customdata[0]:.2s}B
" "Modified: %{customdata[1]}
" "Created: %{customdata[2]}
" "Keywords: %{customdata[3]}
" "Path: %{id}" ), pathbar={'visible': True} # Show breadcrumb trail )) fig.update_layout( margin=dict(t=50, l=25, r=25, b=25), title="File System Treemap" ) return fig async def scan_directory(directory, stop_flag_state, progress=gr.Progress(track_tqdm=True)): """ Asynchronously scans a directory, yielding updates to the UI. """ if not directory or not os.path.isdir(directory): yield create_treemap_figure(pd.DataFrame()), "Invalid directory path.", pd.DataFrame() return file_list = [] processed_count = 0 # Reset stop flag at the beginning of a new scan stop_flag_state['stop'] = False # Create a DataFrame to hold results df = pd.DataFrame() progress(0, desc="Starting scan...") for root, _, files in os.walk(directory, topdown=True): if stop_flag_state['stop']: progress(1.0, "Scan stopped by user.") break for name in files: file_path = os.path.join(root, name) info = get_file_info(file_path, directory) if info: file_list.append(info) processed_count += 1 # Yield updates periodically to keep the UI responsive if processed_count % UPDATE_INTERVAL == 0: df = pd.DataFrame(file_list) yield create_treemap_figure(df), f"Scanned {processed_count} files...", df await asyncio.sleep(0.01) # Allow other tasks to run # Final update after loop finishes or is stopped df = pd.DataFrame(file_list) final_status = f"Scan complete. Found {len(df)} files." if stop_flag_state['stop']: final_status = f"Scan stopped. Displaying {len(df)} found files." yield create_treemap_figure(df), final_status, df def stop_scan(stop_flag_state): """Sets the stop flag to True.""" stop_flag_state['stop'] = True return stop_flag_state, "Stopping scan..." # --- Gradio UI --- with gr.Blocks(theme=gr.themes.Soft(), title="File System Treemap") as app: stop_flag = gr.State({'stop': False}) gr.Markdown("# 📁 File System Treemap Visualizer") gr.Markdown("Enter a directory path to generate a squarified treemap. The visualization will build in real-time.") with gr.Row(): path_input = gr.Textbox( label="Directory Path", placeholder="e.g., C:\\Users\\YourUser\\Documents", scale=3 ) start_button = gr.Button("Start Scan", variant="primary", scale=1) stop_button = gr.Button("Stop Scan", variant="stop", scale=1) status_label = gr.Label("Status: Ready") with gr.Tabs(): with gr.TabItem("Treemap Visualization"): plot_output = gr.Plot(interactive=True) with gr.TabItem("Data Table"): data_output = gr.DataFrame(wrap=True) # Event Handlers start_button.click( fn=scan_directory, inputs=[path_input, stop_flag], outputs=[plot_output, status_label, data_output] ) stop_button.click( fn=stop_scan, inputs=[stop_flag], outputs=[stop_flag, status_label], cancels=[start_button.click] # This cancels the running 'scan_directory' event ) if __name__ == "__main__": app.launch()