|
import gradio as gr |
|
import os |
|
import pandas as pd |
|
import plotly.graph_objects as go |
|
import asyncio |
|
from datetime import datetime |
|
import re |
|
import pathlib |
|
|
|
|
|
|
|
UPDATE_INTERVAL = 250 |
|
|
|
def parse_filename_words(filename): |
|
""" |
|
Extracts contiguous groups of letters from a filename, ignoring numbers and symbols. |
|
Example: "Aarons123File-482.md" -> "Aarons, File, md" |
|
""" |
|
|
|
words = re.findall('[A-Za-z]+', filename) |
|
return ", ".join(words) if words else "N/A" |
|
|
|
def get_file_info(path, root_path): |
|
""" |
|
Gathers required information for a single file. |
|
Returns a dictionary or None if path is not a file or is inaccessible. |
|
""" |
|
try: |
|
if not os.path.isfile(path): |
|
return None |
|
|
|
stat = os.stat(path) |
|
size = stat.st_size |
|
|
|
|
|
if size == 0: |
|
return None |
|
|
|
|
|
try: |
|
relative_path = os.path.relpath(path, root_path) |
|
top_level_dir = relative_path.split(os.sep)[0] |
|
except ValueError: |
|
top_level_dir = os.path.basename(root_path) |
|
|
|
|
|
parent_path = str(pathlib.Path(*pathlib.Path(relative_path).parts[:-1])) |
|
if parent_path == ".": |
|
parent_path = top_level_dir |
|
|
|
return { |
|
'path': path, |
|
'label': os.path.basename(path), |
|
'parent': parent_path, |
|
'size': size, |
|
'color_group': top_level_dir, |
|
'created': datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), |
|
'modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), |
|
'keywords': parse_filename_words(os.path.basename(path)) |
|
} |
|
except (OSError, FileNotFoundError): |
|
return None |
|
|
|
def create_treemap_figure(df): |
|
""" |
|
Generates the Plotly treemap figure from a DataFrame of file info. |
|
""" |
|
if df.empty: |
|
return go.Figure(go.Treemap( |
|
labels=["Your scan will appear here."], |
|
parents=[""], |
|
values=[1] |
|
)) |
|
|
|
|
|
root_label = os.path.basename(df.iloc[0]['path']) |
|
|
|
fig = go.Figure(go.Treemap( |
|
ids=df['path'], |
|
labels=df['label'], |
|
parents=df['parent'], |
|
values=df['size'], |
|
marker_colors=df['color_group'], |
|
tiling_method='squarified', |
|
root_label=root_label, |
|
customdata=df[['size', 'modified', 'created', 'keywords']], |
|
hovertemplate=( |
|
"<b>%{label}</b><br>" |
|
"Size: %{customdata[0]:.2s}B<br>" |
|
"Modified: %{customdata[1]}<br>" |
|
"Created: %{customdata[2]}<br>" |
|
"Keywords: %{customdata[3]}<br>" |
|
"Path: %{id}<extra></extra>" |
|
), |
|
pathbar={'visible': True} |
|
)) |
|
|
|
fig.update_layout( |
|
margin=dict(t=50, l=25, r=25, b=25), |
|
title="File System Treemap" |
|
) |
|
return fig |
|
|
|
async def scan_directory(directory, stop_flag_state, progress=gr.Progress(track_tqdm=True)): |
|
""" |
|
Asynchronously scans a directory, yielding updates to the UI. |
|
""" |
|
if not directory or not os.path.isdir(directory): |
|
yield create_treemap_figure(pd.DataFrame()), "Invalid directory path.", pd.DataFrame() |
|
return |
|
|
|
file_list = [] |
|
processed_count = 0 |
|
|
|
|
|
stop_flag_state['stop'] = False |
|
|
|
|
|
df = pd.DataFrame() |
|
|
|
progress(0, desc="Starting scan...") |
|
|
|
for root, _, files in os.walk(directory, topdown=True): |
|
if stop_flag_state['stop']: |
|
progress(1.0, "Scan stopped by user.") |
|
break |
|
|
|
for name in files: |
|
file_path = os.path.join(root, name) |
|
info = get_file_info(file_path, directory) |
|
if info: |
|
file_list.append(info) |
|
processed_count += 1 |
|
|
|
|
|
if processed_count % UPDATE_INTERVAL == 0: |
|
df = pd.DataFrame(file_list) |
|
yield create_treemap_figure(df), f"Scanned {processed_count} files...", df |
|
await asyncio.sleep(0.01) |
|
|
|
|
|
df = pd.DataFrame(file_list) |
|
final_status = f"Scan complete. Found {len(df)} files." |
|
if stop_flag_state['stop']: |
|
final_status = f"Scan stopped. Displaying {len(df)} found files." |
|
|
|
yield create_treemap_figure(df), final_status, df |
|
|
|
def stop_scan(stop_flag_state): |
|
"""Sets the stop flag to True.""" |
|
stop_flag_state['stop'] = True |
|
return stop_flag_state, "Stopping scan..." |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="File System Treemap") as app: |
|
|
|
stop_flag = gr.State({'stop': False}) |
|
|
|
gr.Markdown("# π File System Treemap Visualizer") |
|
gr.Markdown("Enter a directory path to generate a squarified treemap. The visualization will build in real-time.") |
|
|
|
with gr.Row(): |
|
path_input = gr.Textbox( |
|
label="Directory Path", |
|
placeholder="e.g., C:\\Users\\YourUser\\Documents", |
|
scale=3 |
|
) |
|
start_button = gr.Button("Start Scan", variant="primary", scale=1) |
|
stop_button = gr.Button("Stop Scan", variant="stop", scale=1) |
|
|
|
status_label = gr.Label("Status: Ready") |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Treemap Visualization"): |
|
plot_output = gr.Plot(interactive=True) |
|
with gr.TabItem("Data Table"): |
|
data_output = gr.DataFrame(wrap=True) |
|
|
|
|
|
start_button.click( |
|
fn=scan_directory, |
|
inputs=[path_input, stop_flag], |
|
outputs=[plot_output, status_label, data_output] |
|
) |
|
|
|
stop_button.click( |
|
fn=stop_scan, |
|
inputs=[stop_flag], |
|
outputs=[stop_flag, status_label], |
|
cancels=[start_button.click] |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |