Spaces:

baconnier
/

csv-plus-plus

Running

App Files Files Community

baconnier commited on Oct 26, 2024

Commit

7617875

verified ·

1 Parent(s): b280299

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -142

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import matplotlib.pyplot as plt
 from sklearn.preprocessing import StandardScaler
 from autoviz.AutoViz_Class import AutoViz_Class
 import shutil
 class DataAnalyzer:
     def __init__(self):
@@ -16,40 +18,15 @@ class DataAnalyzer:
         self.df = None
         self.AV = AutoViz_Class()
-    def generate_sweetviz_report(self, df):
-        self.df = df
-        report = sv.analyze(df)
-        report_path = os.path.join(self.temp_dir, "report.html")
-        report.show_html(report_path, open_browser=False)
-        with open(report_path, 'r', encoding='utf-8') as f:
-            html_content = f.read()
-        html_with_table = f"""
-        <table width="100%" style="border-collapse: collapse;">
-            <tr>
-                <td style="padding: 20px; border: 1px solid #ddd;">
-                    <div style="height: 800px; overflow: auto;">
-                        {html_content}
-                    </div>
-                </td>
-            </tr>
-        </table>
-        """
-        os.remove(report_path)
-        return html_with_table
     def generate_autoviz_report(self, df):
-        """Generate AutoViz report and return the HTML content"""
-        # Create a temporary directory for AutoViz output
-        viz_temp_dir = os.path.join(self.temp_dir, "autoviz")
         if os.path.exists(viz_temp_dir):
             shutil.rmtree(viz_temp_dir)
         os.makedirs(viz_temp_dir)
         try:
-            # Generate AutoViz report
             dft = self.AV.AutoViz(
                 filename='',
                 sep=',',
@@ -59,87 +36,64 @@ class DataAnalyzer:
                 verbose=0,
                 lowess=False,
                 chart_format='html',
-                max_rows_analyzed=150000,
-                save_plot_dir=viz_temp_dir
             )
-            # Combine all HTML files into one
-            html_content = ""
-            for file in sorted(os.listdir(viz_temp_dir)):
-                if file.endswith('.html'):
-                    with open(os.path.join(viz_temp_dir, file), 'r', encoding='utf-8') as f:
-                        html_content += f.read() + "<br><hr><br>"
-            # Wrap the content in a scrollable div
-            html_with_table = f"""
-            <table width="100%" style="border-collapse: collapse;">
-                <tr>
-                    <td style="padding: 20px; border: 1px solid #ddd;">
-                        <div style="height: 800px; overflow: auto;">
-                            {html_content}
-                        </div>
-                    </td>
-                </tr>
-            </table>
             """
-            return html_with_table
         except Exception as e:
-            return f"Error generating AutoViz report: {str(e)}"
         finally:
-            # Clean up
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)
-    def encode_and_visualize(self, column_name, encoder_type='binary'):
-        if self.df is None or column_name not in self.df.columns:
-            return None
-        df_subset = self.df[[column_name]].copy()
-        encoders = {
-            'binary': ce.BinaryEncoder(),
-            'onehot': ce.OneHotEncoder(),
-            'catboost': ce.CatBoostEncoder(),
-            'count': ce.CountEncoder()
-        }
-        encoder = encoders.get(encoder_type)
-        encoded_df = encoder.fit_transform(df_subset)
-        scaler = StandardScaler()
-        scaled_data = scaler.fit_transform(encoded_df)
-        reducer = umap.UMAP(
-            n_neighbors=15,
-            min_dist=0.1,
-            n_components=2,
-            random_state=42
-        )
-        embedding = reducer.fit_transform(scaled_data)
-        plt.figure(figsize=(10, 6))
-        scatter = plt.scatter(
-            embedding[:, 0],
-            embedding[:, 1],
-            c=pd.factorize(df_subset[column_name])[0],
-            cmap='viridis',
-            alpha=0.6
-        )
-        plt.colorbar(scatter)
-        plt.title(f'UMAP visualization of {column_name}\nusing {encoder_type} encoding')
-        plt.xlabel('UMAP1')
-        plt.ylabel('UMAP2')
-        buf = io.BytesIO()
-        plt.savefig(buf, format='png', bbox_inches='tight')
-        plt.close()
-        buf.seek(0)
-        return buf
 def create_interface():
     analyzer = DataAnalyzer()
@@ -148,74 +102,60 @@ def create_interface():
         gr.Markdown("# Data Analysis Dashboard")
         with gr.Tabs():
-            with gr.TabItem("Sweetviz Analysis"):
                 file_input = gr.File(label="Upload CSV")
-                report_html = gr.HTML()
             with gr.TabItem("AutoViz Analysis"):
-                autoviz_html = gr.HTML()
-            with gr.TabItem("Categorical Analysis"):
                 with gr.Row():
-                    column_dropdown = gr.Dropdown(
-                        label="Select Categorical Column",
-                        choices=[],
-                        interactive=True
-                    )
-                    encoder_dropdown = gr.Dropdown(
-                        label="Select Encoder",
-                        choices=['binary', 'onehot', 'catboost', 'count'],
-                        value='binary',
-                        interactive=True
-                    )
-                plot_output = gr.Image(label="UMAP Visualization")
         def process_file(file):
             if file is None:
-                return None, None, gr.Dropdown(choices=[])
             try:
                 df = pd.read_csv(file.name)
-                cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
-                # Generate both reports
                 sweetviz_report = analyzer.generate_sweetviz_report(df)
                 autoviz_report = analyzer.generate_autoviz_report(df)
                 return (
                     sweetviz_report,
                     autoviz_report,
                     gr.Dropdown(choices=cat_columns)
                 )
             except Exception as e:
-                return f"Error: {str(e)}", None, gr.Dropdown(choices=[])
-        def update_plot(column, encoder_type):
-            if column is None:
-                return None
-            try:
-                return analyzer.encode_and_visualize(column, encoder_type)
-            except Exception as e:
-                return None
         file_input.change(
             fn=process_file,
             inputs=[file_input],
-            outputs=[report_html, autoviz_html, column_dropdown]
         )
-        column_dropdown.change(
-            fn=update_plot,
-            inputs=[column_dropdown, encoder_dropdown],
-            outputs=[plot_output]
-        )
-        encoder_dropdown.change(
-            fn=update_plot,
-            inputs=[column_dropdown, encoder_dropdown],
-            outputs=[plot_output]
-        )
     return demo
 if __name__ == "__main__":

 from sklearn.preprocessing import StandardScaler
 from autoviz.AutoViz_Class import AutoViz_Class
 import shutil
+import warnings
+warnings.filterwarnings('ignore')
 class DataAnalyzer:
     def __init__(self):
         self.df = None
         self.AV = AutoViz_Class()
     def generate_autoviz_report(self, df):
+        """Generate AutoViz report with proper error handling"""
+        viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
         if os.path.exists(viz_temp_dir):
             shutil.rmtree(viz_temp_dir)
         os.makedirs(viz_temp_dir)
         try:
+            # Configure AutoViz with safe defaults
             dft = self.AV.AutoViz(
                 filename='',
                 sep=',',
                 verbose=0,
                 lowess=False,
                 chart_format='html',
+                max_rows_analyzed=5000,  # Limit rows for better performance
+                max_cols_analyzed=30,    # Limit columns
+                save_plot_dir=viz_temp_dir,
+                ignore_warnings=True
             )
+            # Collect all generated HTML files
+            html_parts = []
+            if os.path.exists(viz_temp_dir):
+                for file in sorted(os.listdir(viz_temp_dir)):
+                    if file.endswith('.html'):
+                        file_path = os.path.join(viz_temp_dir, file)
+                        try:
+                            with open(file_path, 'r', encoding='utf-8') as f:
+                                content = f.read()
+                                if content.strip():  # Only add non-empty content
+                                    html_parts.append(content)
+                        except Exception as e:
+                            print(f"Error reading file {file}: {str(e)}")
+            if not html_parts:
+                return "No visualizations were generated. The dataset might be too small or contain invalid data."
+            # Combine all HTML content
+            combined_html = "<br><hr><br>".join(html_parts)
+            # Create a container with proper styling
+            html_with_container = f"""
+            <div style="width: 100%; max-width: 1200px; margin: 0 auto;">
+                <div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
+                    <h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2>
+                    {combined_html}
+                </div>
+            </div>
             """
+            return html_with_container
         except Exception as e:
+            error_message = f"""
+            <div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;">
+                <h3>Error Generating AutoViz Report</h3>
+                <p>Error details: {str(e)}</p>
+                <p>Suggestions:</p>
+                <ul>
+                    <li>Check if your dataset has valid numerical or categorical columns</li>
+                    <li>Ensure your dataset has at least 2 columns and 10 rows</li>
+                    <li>Remove any corrupted or invalid data</li>
+                </ul>
+            </div>
+            """
+            return error_message
         finally:
+            # Cleanup
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)
+    # ... (rest of the DataAnalyzer class remains the same)
 def create_interface():
     analyzer = DataAnalyzer()
         gr.Markdown("# Data Analysis Dashboard")
         with gr.Tabs():
+            with gr.TabItem("Data Upload & Preview"):
                 file_input = gr.File(label="Upload CSV")
+                data_preview = gr.Dataframe(label="Data Preview")
             with gr.TabItem("AutoViz Analysis"):
                 with gr.Row():
+                    autoviz_html = gr.HTML()
+                    gr.Markdown("""
+                    ### AutoViz Analysis Info
+                    - Generates automatic visualizations
+                    - Analyzes relationships between variables
+                    - Creates distribution plots
+                    - Shows correlation matrices
+                    - Identifies patterns and outliers
+                    """)
+            # ... (other tabs remain the same)
         def process_file(file):
             if file is None:
+                return None, None, None, gr.Dropdown(choices=[])
             try:
                 df = pd.read_csv(file.name)
+                # Preview first few rows
+                preview = df.head()
+                # Generate reports
                 sweetviz_report = analyzer.generate_sweetviz_report(df)
                 autoviz_report = analyzer.generate_autoviz_report(df)
+                # Get categorical columns
+                cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
                 return (
+                    preview,
                     sweetviz_report,
                     autoviz_report,
                     gr.Dropdown(choices=cat_columns)
                 )
             except Exception as e:
+                error_message = f"Error processing file: {str(e)}"
+                return None, error_message, error_message, gr.Dropdown(choices=[])
+        # Update file input handler
         file_input.change(
             fn=process_file,
             inputs=[file_input],
+            outputs=[data_preview, report_html, autoviz_html, column_dropdown]
         )
+        # ... (rest of the interface remains the same)
     return demo
 if __name__ == "__main__":