File size: 6,421 Bytes
5168b60 9fa2ea2 5168b60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import gradio as gr
import os
import pandas as pd
import plotly.graph_objects as go
import asyncio
from datetime import datetime
import re
import pathlib
# --- Configuration ---
# Update frequency: how many files to process before updating the UI
UPDATE_INTERVAL = 250
def parse_filename_words(filename):
"""
Extracts contiguous groups of letters from a filename, ignoring numbers and symbols.
Example: "Aarons123File-482.md" -> "Aarons, File, md"
"""
# Find all sequences of letters
words = re.findall('[A-Za-z]+', filename)
return ", ".join(words) if words else "N/A"
def get_file_info(path, root_path):
"""
Gathers required information for a single file.
Returns a dictionary or None if path is not a file or is inaccessible.
"""
try:
if not os.path.isfile(path):
return None
stat = os.stat(path)
size = stat.st_size
# Skip empty files
if size == 0:
return None
# Determine the top-level directory for color grouping
try:
relative_path = os.path.relpath(path, root_path)
top_level_dir = relative_path.split(os.sep)[0]
except ValueError:
top_level_dir = os.path.basename(root_path)
# Get parent directory relative to the root for treemap structure
parent_path = str(pathlib.Path(*pathlib.Path(relative_path).parts[:-1]))
if parent_path == ".":
parent_path = top_level_dir
return {
'path': path,
'label': os.path.basename(path),
'parent': parent_path,
'size': size,
'color_group': top_level_dir,
'created': datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'),
'modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
'keywords': parse_filename_words(os.path.basename(path))
}
except (OSError, FileNotFoundError):
return None
def create_treemap_figure(df):
"""
Generates the Plotly treemap figure from a DataFrame of file info.
"""
if df.empty:
return go.Figure(go.Treemap(
labels=["Your scan will appear here."],
parents=[""],
values=[1]
))
# Ensure the root of the treemap is visible
root_label = os.path.basename(df.iloc[0]['path'])
fig = go.Figure(go.Treemap(
ids=df['path'],
labels=df['label'],
parents=df['parent'],
values=df['size'],
marker_colors=df['color_group'], # Color by top-level folder
tiling_method='squarified', # Use the squarified algorithm
root_label=root_label,
customdata=df[['size', 'modified', 'created', 'keywords']],
hovertemplate=(
"<b>%{label}</b><br>"
"Size: %{customdata[0]:.2s}B<br>"
"Modified: %{customdata[1]}<br>"
"Created: %{customdata[2]}<br>"
"Keywords: %{customdata[3]}<br>"
"Path: %{id}<extra></extra>"
),
pathbar={'visible': True} # Show breadcrumb trail
))
fig.update_layout(
margin=dict(t=50, l=25, r=25, b=25),
title="File System Treemap"
)
return fig
async def scan_directory(directory, stop_flag_state, progress=gr.Progress(track_tqdm=True)):
"""
Asynchronously scans a directory, yielding updates to the UI.
"""
if not directory or not os.path.isdir(directory):
yield create_treemap_figure(pd.DataFrame()), "Invalid directory path.", pd.DataFrame()
return
file_list = []
processed_count = 0
# Reset stop flag at the beginning of a new scan
stop_flag_state['stop'] = False
# Create a DataFrame to hold results
df = pd.DataFrame()
progress(0, desc="Starting scan...")
for root, _, files in os.walk(directory, topdown=True):
if stop_flag_state['stop']:
progress(1.0, "Scan stopped by user.")
break
for name in files:
file_path = os.path.join(root, name)
info = get_file_info(file_path, directory)
if info:
file_list.append(info)
processed_count += 1
# Yield updates periodically to keep the UI responsive
if processed_count % UPDATE_INTERVAL == 0:
df = pd.DataFrame(file_list)
yield create_treemap_figure(df), f"Scanned {processed_count} files...", df
await asyncio.sleep(0.01) # Allow other tasks to run
# Final update after loop finishes or is stopped
df = pd.DataFrame(file_list)
final_status = f"Scan complete. Found {len(df)} files."
if stop_flag_state['stop']:
final_status = f"Scan stopped. Displaying {len(df)} found files."
yield create_treemap_figure(df), final_status, df
def stop_scan(stop_flag_state):
"""Sets the stop flag to True."""
stop_flag_state['stop'] = True
return stop_flag_state, "Stopping scan..."
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft(), title="File System Treemap") as app:
stop_flag = gr.State({'stop': False})
gr.Markdown("# 📁 File System Treemap Visualizer")
gr.Markdown("Enter a directory path to generate a squarified treemap. The visualization will build in real-time.")
with gr.Row():
path_input = gr.Textbox(
label="Directory Path",
placeholder="e.g., C:\\Users\\YourUser\\Documents",
scale=3
)
start_button = gr.Button("Start Scan", variant="primary", scale=1)
stop_button = gr.Button("Stop Scan", variant="stop", scale=1)
status_label = gr.Label("Status: Ready")
with gr.Tabs():
with gr.TabItem("Treemap Visualization"):
plot_output = gr.Plot(interactive=True)
with gr.TabItem("Data Table"):
data_output = gr.DataFrame(wrap=True)
# Event Handlers
start_button.click(
fn=scan_directory,
inputs=[path_input, stop_flag],
outputs=[plot_output, status_label, data_output]
)
stop_button.click(
fn=stop_scan,
inputs=[stop_flag],
outputs=[stop_flag, status_label],
cancels=[start_button.click] # This cancels the running 'scan_directory' event
)
if __name__ == "__main__":
app.launch() |