File size: 5,533 Bytes
0cb60c7
 
 
67f471c
 
771365f
 
 
 
947739b
 
7617875
 
0cb60c7
 
c9d2489
67f471c
771365f
947739b
771365f
8d8e69e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947739b
8d8e69e
7617875
947739b
 
 
 
 
8d8e69e
 
 
947739b
 
 
8d8e69e
947739b
 
 
 
8d8e69e
 
 
947739b
 
8d8e69e
7617875
 
 
 
 
 
 
 
8d8e69e
7617875
 
 
 
 
8d8e69e
7617875
 
8d8e69e
947739b
 
8d8e69e
947739b
 
 
 
0cb60c7
 
 
 
830b865
0cb60c7
8d8e69e
 
 
 
 
 
 
 
 
 
276ed24
7617875
276ed24
 
947739b
8d8e69e
 
 
 
7617875
8d8e69e
 
 
 
 
 
 
 
7617875
9138597
0cb60c7
7617875
0cb60c7
 
 
7617875
947739b
 
7617875
 
947739b
7617875
947739b
 
 
 
0cb60c7
8d8e69e
7617875
0cb60c7
 
9138597
7617875
771365f
7617875
0cb60c7
 
 
 
9a72b36
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import pandas as pd
import sweetviz as sv
import tempfile
import os
import category_encoders as ce
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from autoviz.AutoViz_Class import AutoViz_Class
import shutil
import warnings
warnings.filterwarnings('ignore')

class DataAnalyzer:
    def __init__(self):
        self.temp_dir = tempfile.mkdtemp()
        self.df = None
        self.AV = AutoViz_Class()
        
    def generate_sweetviz_report(self, df):
        self.df = df
        report = sv.analyze(df)
        report_path = os.path.join(self.temp_dir, "report.html")
        report.show_html(report_path, open_browser=False)
        
        with open(report_path, 'r', encoding='utf-8') as f:
            html_content = f.read()
        
        html_with_table = f"""
        <table width="100%" style="border-collapse: collapse;">
            <tr>
                <td style="padding: 20px; border: 1px solid #ddd;">
                    <div style="height: 800px; overflow: auto;">
                        {html_content}
                    </div>
                </td>
            </tr>
        </table>
        """
        
        os.remove(report_path)
        return html_with_table

    def generate_autoviz_report(self, df):
        """Generate AutoViz report with proper sequence"""
        viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
        if os.path.exists(viz_temp_dir):
            shutil.rmtree(viz_temp_dir)
        os.makedirs(viz_temp_dir)

        try:
            # Proper AutoViz sequence
            plt.close('all')  # Close any existing plots
            dfte = self.AV.AutoViz(
                filename='',
                sep=',',
                depVar='',
                dfte=df,  # Pass DataFrame directly
                header=0,
                verbose=0,
                lowess=False,
                chart_format='html',
                max_rows_analyzed=5000,
                max_cols_analyzed=30,
                save_plot_dir=viz_temp_dir
            )

            # Collect generated HTML files
            html_parts = []
            if os.path.exists(viz_temp_dir):
                for file in sorted(os.listdir(viz_temp_dir)):
                    if file.endswith('.html'):
                        file_path = os.path.join(viz_temp_dir, file)
                        try:
                            with open(file_path, 'r', encoding='utf-8') as f:
                                content = f.read()
                                if content.strip():
                                    html_parts.append(content)
                        except Exception as e:
                            print(f"Error reading file {file}: {str(e)}")

            if not html_parts:
                return "No visualizations were generated. Please check your data."

            combined_html = "<br><hr><br>".join(html_parts)
            return combined_html

        except Exception as e:
            return f"Error in AutoViz: {str(e)}"
        finally:
            if os.path.exists(viz_temp_dir):
                shutil.rmtree(viz_temp_dir)

def create_interface():
    analyzer = DataAnalyzer()
    
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# Data Analysis Dashboard")
        
        # Define all outputs first
        data_preview = gr.Dataframe(label="Data Preview")
        report_html = gr.HTML(label="Sweetviz Report")
        autoviz_html = gr.HTML(label="AutoViz Report")
        column_dropdown = gr.Dropdown(
            label="Select Categorical Column",
            choices=[],
            interactive=True
        )
        
        with gr.Tabs():
            with gr.TabItem("Data Upload & Preview"):
                file_input = gr.File(label="Upload CSV")
            
            with gr.TabItem("AutoViz Analysis"):
                gr.Markdown("""
                ### AutoViz Analysis
                Automatic visualization of your dataset
                """)
            
            with gr.TabItem("Categorical Analysis"):
                encoder_dropdown = gr.Dropdown(
                    label="Select Encoder",
                    choices=['binary', 'onehot', 'catboost', 'count'],
                    value='binary',
                    interactive=True
                )
                plot_output = gr.Image(label="UMAP Visualization")

        def process_file(file):
            if file is None:
                return None, None, None, gr.Dropdown(choices=[])
            
            try:
                df = pd.read_csv(file.name)
                preview = df.head()
                sweetviz_report = analyzer.generate_sweetviz_report(df)
                autoviz_report = analyzer.generate_autoviz_report(df)
                cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
                
                return (
                    preview,
                    sweetviz_report,
                    autoviz_report,
                    gr.Dropdown(choices=cat_columns)
                )
            except Exception as e:
                return None, f"Error: {str(e)}", f"Error: {str(e)}", gr.Dropdown(choices=[])

        file_input.change(
            fn=process_file,
            inputs=[file_input],
            outputs=[data_preview, report_html, autoviz_html, column_dropdown]
        )

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(show_error=True)