import gradio as gr
import os
import pandas as pd
import plotly.graph_objects as go
import asyncio
from datetime import datetime
import re
import pathlib
# --- Configuration ---
# Update frequency: how many files to process before updating the UI
UPDATE_INTERVAL = 250
def parse_filename_words(filename):
"""
Extracts contiguous groups of letters from a filename, ignoring numbers and symbols.
Example: "Aarons123File-482.md" -> "Aarons, File, md"
"""
# Find all sequences of letters
words = re.findall('[A-Za-z]+', filename)
return ", ".join(words) if words else "N/A"
def get_file_info(path, root_path):
"""
Gathers required information for a single file.
Returns a dictionary or None if path is not a file or is inaccessible.
"""
try:
if not os.path.isfile(path):
return None
stat = os.stat(path)
size = stat.st_size
# Skip empty files
if size == 0:
return None
# Determine the top-level directory for color grouping
try:
relative_path = os.path.relpath(path, root_path)
top_level_dir = relative_path.split(os.sep)[0]
except ValueError:
top_level_dir = os.path.basename(root_path)
# Get parent directory relative to the root for treemap structure
parent_path = str(pathlib.Path(*pathlib.Path(relative_path).parts[:-1]))
if parent_path == ".":
parent_path = top_level_dir
return {
'path': path,
'label': os.path.basename(path),
'parent': parent_path,
'size': size,
'color_group': top_level_dir,
'created': datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'),
'modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
'keywords': parse_filename_words(os.path.basename(path))
}
except (OSError, FileNotFoundError):
return None
def create_treemap_figure(df):
"""
Generates the Plotly treemap figure from a DataFrame of file info.
"""
if df.empty:
return go.Figure(go.Treemap(
labels=["Your scan will appear here."],
parents=[""],
values=[1]
))
# Ensure the root of the treemap is visible
root_label = os.path.basename(df.iloc[0]['path'])
fig = go.Figure(go.Treemap(
ids=df['path'],
labels=df['label'],
parents=df['parent'],
values=df['size'],
marker_colors=df['color_group'], # Color by top-level folder
tiling_method='squarified', # Use the squarified algorithm
root_label=root_label,
customdata=df[['size', 'modified', 'created', 'keywords']],
hovertemplate=(
"%{label}
"
"Size: %{customdata[0]:.2s}B
"
"Modified: %{customdata[1]}
"
"Created: %{customdata[2]}
"
"Keywords: %{customdata[3]}
"
"Path: %{id}"
),
pathbar={'visible': True} # Show breadcrumb trail
))
fig.update_layout(
margin=dict(t=50, l=25, r=25, b=25),
title="File System Treemap"
)
return fig
async def scan_directory(directory, stop_flag_state, progress=gr.Progress(track_tqdm=True)):
"""
Asynchronously scans a directory, yielding updates to the UI.
"""
if not directory or not os.path.isdir(directory):
yield create_treemap_figure(pd.DataFrame()), "Invalid directory path.", pd.DataFrame()
return
file_list = []
processed_count = 0
# Reset stop flag at the beginning of a new scan
stop_flag_state['stop'] = False
# Create a DataFrame to hold results
df = pd.DataFrame()
progress(0, desc="Starting scan...")
for root, _, files in os.walk(directory, topdown=True):
if stop_flag_state['stop']:
progress(1.0, "Scan stopped by user.")
break
for name in files:
file_path = os.path.join(root, name)
info = get_file_info(file_path, directory)
if info:
file_list.append(info)
processed_count += 1
# Yield updates periodically to keep the UI responsive
if processed_count % UPDATE_INTERVAL == 0:
df = pd.DataFrame(file_list)
yield create_treemap_figure(df), f"Scanned {processed_count} files...", df
await asyncio.sleep(0.01) # Allow other tasks to run
# Final update after loop finishes or is stopped
df = pd.DataFrame(file_list)
final_status = f"Scan complete. Found {len(df)} files."
if stop_flag_state['stop']:
final_status = f"Scan stopped. Displaying {len(df)} found files."
yield create_treemap_figure(df), final_status, df
def stop_scan(stop_flag_state):
"""Sets the stop flag to True."""
stop_flag_state['stop'] = True
return stop_flag_state, "Stopping scan..."
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft(), title="File System Treemap") as app:
stop_flag = gr.State({'stop': False})
gr.Markdown("# 📁 File System Treemap Visualizer")
gr.Markdown("Enter a directory path to generate a squarified treemap. The visualization will build in real-time.")
with gr.Row():
path_input = gr.Textbox(
label="Directory Path",
placeholder="e.g., C:\\Users\\YourUser\\Documents",
scale=3
)
start_button = gr.Button("Start Scan", variant="primary", scale=1)
stop_button = gr.Button("Stop Scan", variant="stop", scale=1)
status_label = gr.Label("Status: Ready")
with gr.Tabs():
with gr.TabItem("Treemap Visualization"):
plot_output = gr.Plot(interactive=True)
with gr.TabItem("Data Table"):
data_output = gr.DataFrame(wrap=True)
# Event Handlers
start_button.click(
fn=scan_directory,
inputs=[path_input, stop_flag],
outputs=[plot_output, status_label, data_output]
)
stop_button.click(
fn=stop_scan,
inputs=[stop_flag],
outputs=[stop_flag, status_label],
cancels=[start_button.click] # This cancels the running 'scan_directory' event
)
if __name__ == "__main__":
app.launch()