Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,24 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import sweetviz as sv
|
4 |
-
import
|
5 |
-
import
|
6 |
|
7 |
class DataAnalyzer:
|
8 |
def __init__(self):
|
|
|
9 |
self.current_df = None
|
10 |
|
11 |
def generate_sweetviz_report(self, df):
|
12 |
report = sv.analyze(df)
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def get_dataset_info(self, df):
|
18 |
info_dict = {
|
@@ -20,9 +26,9 @@ class DataAnalyzer:
|
|
20 |
"Columns": len(df.columns),
|
21 |
"Memory Usage (MB)": round(df.memory_usage(deep=True).sum() / 1024**2, 2),
|
22 |
"Missing Values": int(df.isnull().sum().sum()),
|
23 |
-
"
|
24 |
}
|
25 |
-
return
|
26 |
|
27 |
def create_interface():
|
28 |
analyzer = DataAnalyzer()
|
@@ -32,32 +38,28 @@ def create_interface():
|
|
32 |
|
33 |
with gr.Row():
|
34 |
file_input = gr.File(label="Upload CSV")
|
35 |
-
|
36 |
|
37 |
report_html = gr.HTML()
|
38 |
|
39 |
def process_file(file):
|
40 |
if file is None:
|
41 |
-
return
|
42 |
|
43 |
try:
|
44 |
df = pd.read_csv(file.name)
|
45 |
info = analyzer.get_dataset_info(df)
|
46 |
report = analyzer.generate_sweetviz_report(df)
|
47 |
|
48 |
-
|
49 |
-
b64 = base64.b64encode(report.encode()).decode()
|
50 |
-
download_link = f'<a href="data:text/html;base64,{b64}" download="analysis_report.html">Download Report</a>'
|
51 |
-
|
52 |
-
return info, report + download_link
|
53 |
|
54 |
except Exception as e:
|
55 |
-
return
|
56 |
|
57 |
file_input.change(
|
58 |
fn=process_file,
|
59 |
inputs=[file_input],
|
60 |
-
outputs=[
|
61 |
)
|
62 |
|
63 |
return demo
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import sweetviz as sv
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
|
7 |
class DataAnalyzer:
|
8 |
def __init__(self):
|
9 |
+
self.temp_dir = tempfile.mkdtemp()
|
10 |
self.current_df = None
|
11 |
|
12 |
def generate_sweetviz_report(self, df):
|
13 |
report = sv.analyze(df)
|
14 |
+
# Save to temporary file
|
15 |
+
temp_path = os.path.join(self.temp_dir, "report.html")
|
16 |
+
report.show_html(temp_path, open_browser=False)
|
17 |
+
|
18 |
+
# Read the content
|
19 |
+
with open(temp_path, 'r', encoding='utf-8') as f:
|
20 |
+
content = f.read()
|
21 |
+
return content
|
22 |
|
23 |
def get_dataset_info(self, df):
|
24 |
info_dict = {
|
|
|
26 |
"Columns": len(df.columns),
|
27 |
"Memory Usage (MB)": round(df.memory_usage(deep=True).sum() / 1024**2, 2),
|
28 |
"Missing Values": int(df.isnull().sum().sum()),
|
29 |
+
"Column Types": df.dtypes.astype(str).to_dict()
|
30 |
}
|
31 |
+
return info_dict
|
32 |
|
33 |
def create_interface():
|
34 |
analyzer = DataAnalyzer()
|
|
|
38 |
|
39 |
with gr.Row():
|
40 |
file_input = gr.File(label="Upload CSV")
|
41 |
+
dataset_info = gr.JSON(label="Dataset Information")
|
42 |
|
43 |
report_html = gr.HTML()
|
44 |
|
45 |
def process_file(file):
|
46 |
if file is None:
|
47 |
+
return None, None
|
48 |
|
49 |
try:
|
50 |
df = pd.read_csv(file.name)
|
51 |
info = analyzer.get_dataset_info(df)
|
52 |
report = analyzer.generate_sweetviz_report(df)
|
53 |
|
54 |
+
return info, report
|
|
|
|
|
|
|
|
|
55 |
|
56 |
except Exception as e:
|
57 |
+
return {"error": str(e)}, None
|
58 |
|
59 |
file_input.change(
|
60 |
fn=process_file,
|
61 |
inputs=[file_input],
|
62 |
+
outputs=[dataset_info, report_html]
|
63 |
)
|
64 |
|
65 |
return demo
|