Spaces:

baconnier
/

csv-plus-plus

Running

App Files Files Community

baconnier commited on Oct 26, 2024

Commit

8d8e69e

verified ·

1 Parent(s): 7617875

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -63

app.py CHANGED Viewed

@@ -18,31 +18,55 @@ class DataAnalyzer:
         self.df = None
         self.AV = AutoViz_Class()
     def generate_autoviz_report(self, df):
-        """Generate AutoViz report with proper error handling"""
         viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
         if os.path.exists(viz_temp_dir):
             shutil.rmtree(viz_temp_dir)
         os.makedirs(viz_temp_dir)
         try:
-            # Configure AutoViz with safe defaults
-            dft = self.AV.AutoViz(
                 filename='',
                 sep=',',
                 depVar='',
-                dfte=df,
                 header=0,
                 verbose=0,
                 lowess=False,
                 chart_format='html',
-                max_rows_analyzed=5000,  # Limit rows for better performance
-                max_cols_analyzed=30,    # Limit columns
-                save_plot_dir=viz_temp_dir,
-                ignore_warnings=True
             )
-            # Collect all generated HTML files
             html_parts = []
             if os.path.exists(viz_temp_dir):
                 for file in sorted(os.listdir(viz_temp_dir)):
@@ -51,74 +75,57 @@ class DataAnalyzer:
                         try:
                             with open(file_path, 'r', encoding='utf-8') as f:
                                 content = f.read()
-                                if content.strip():  # Only add non-empty content
                                     html_parts.append(content)
                         except Exception as e:
                             print(f"Error reading file {file}: {str(e)}")
             if not html_parts:
-                return "No visualizations were generated. The dataset might be too small or contain invalid data."
-            # Combine all HTML content
             combined_html = "<br><hr><br>".join(html_parts)
-            # Create a container with proper styling
-            html_with_container = f"""
-            <div style="width: 100%; max-width: 1200px; margin: 0 auto;">
-                <div style="height: 800px; overflow-y: auto; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
-                    <h2 style="text-align: center; margin-bottom: 20px;">AutoViz Analysis Report</h2>
-                    {combined_html}
-                </div>
-            </div>
-            """
-            return html_with_container
         except Exception as e:
-            error_message = f"""
-            <div style="color: red; padding: 20px; border: 1px solid red; border-radius: 5px; margin: 20px;">
-                <h3>Error Generating AutoViz Report</h3>
-                <p>Error details: {str(e)}</p>
-                <p>Suggestions:</p>
-                <ul>
-                    <li>Check if your dataset has valid numerical or categorical columns</li>
-                    <li>Ensure your dataset has at least 2 columns and 10 rows</li>
-                    <li>Remove any corrupted or invalid data</li>
-                </ul>
-            </div>
-            """
-            return error_message
         finally:
-            # Cleanup
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)
-    # ... (rest of the DataAnalyzer class remains the same)
 def create_interface():
     analyzer = DataAnalyzer()
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# Data Analysis Dashboard")
         with gr.Tabs():
             with gr.TabItem("Data Upload & Preview"):
                 file_input = gr.File(label="Upload CSV")
-                data_preview = gr.Dataframe(label="Data Preview")
             with gr.TabItem("AutoViz Analysis"):
-                with gr.Row():
-                    autoviz_html = gr.HTML()
-                    gr.Markdown("""
-                    ### AutoViz Analysis Info
-                    - Generates automatic visualizations
-                    - Analyzes relationships between variables
-                    - Creates distribution plots
-                    - Shows correlation matrices
-                    - Identifies patterns and outliers
-                    """)
-            # ... (other tabs remain the same)
         def process_file(file):
             if file is None:
@@ -126,15 +133,9 @@ def create_interface():
             try:
                 df = pd.read_csv(file.name)
-                # Preview first few rows
                 preview = df.head()
-                # Generate reports
                 sweetviz_report = analyzer.generate_sweetviz_report(df)
                 autoviz_report = analyzer.generate_autoviz_report(df)
-                # Get categorical columns
                 cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
                 return (
@@ -144,17 +145,13 @@ def create_interface():
                     gr.Dropdown(choices=cat_columns)
                 )
             except Exception as e:
-                error_message = f"Error processing file: {str(e)}"
-                return None, error_message, error_message, gr.Dropdown(choices=[])
-        # Update file input handler
         file_input.change(
             fn=process_file,
             inputs=[file_input],
             outputs=[data_preview, report_html, autoviz_html, column_dropdown]
         )
-        # ... (rest of the interface remains the same)
     return demo

         self.df = None
         self.AV = AutoViz_Class()
+    def generate_sweetviz_report(self, df):
+        self.df = df
+        report = sv.analyze(df)
+        report_path = os.path.join(self.temp_dir, "report.html")
+        report.show_html(report_path, open_browser=False)
+        with open(report_path, 'r', encoding='utf-8') as f:
+            html_content = f.read()
+        html_with_table = f"""
+        <table width="100%" style="border-collapse: collapse;">
+            <tr>
+                <td style="padding: 20px; border: 1px solid #ddd;">
+                    <div style="height: 800px; overflow: auto;">
+                        {html_content}
+                    </div>
+                </td>
+            </tr>
+        </table>
+        """
+        os.remove(report_path)
+        return html_with_table
     def generate_autoviz_report(self, df):
+        """Generate AutoViz report with proper sequence"""
         viz_temp_dir = os.path.join(self.temp_dir, "autoviz_output")
         if os.path.exists(viz_temp_dir):
             shutil.rmtree(viz_temp_dir)
         os.makedirs(viz_temp_dir)
         try:
+            # Proper AutoViz sequence
+            plt.close('all')  # Close any existing plots
+            dfte = self.AV.AutoViz(
                 filename='',
                 sep=',',
                 depVar='',
+                dfte=df,  # Pass DataFrame directly
                 header=0,
                 verbose=0,
                 lowess=False,
                 chart_format='html',
+                max_rows_analyzed=5000,
+                max_cols_analyzed=30,
+                save_plot_dir=viz_temp_dir
             )
+            # Collect generated HTML files
             html_parts = []
             if os.path.exists(viz_temp_dir):
                 for file in sorted(os.listdir(viz_temp_dir)):
                         try:
                             with open(file_path, 'r', encoding='utf-8') as f:
                                 content = f.read()
+                                if content.strip():
                                     html_parts.append(content)
                         except Exception as e:
                             print(f"Error reading file {file}: {str(e)}")
             if not html_parts:
+                return "No visualizations were generated. Please check your data."
             combined_html = "<br><hr><br>".join(html_parts)
+            return combined_html
         except Exception as e:
+            return f"Error in AutoViz: {str(e)}"
         finally:
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)
 def create_interface():
     analyzer = DataAnalyzer()
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# Data Analysis Dashboard")
+        # Define all outputs first
+        data_preview = gr.Dataframe(label="Data Preview")
+        report_html = gr.HTML(label="Sweetviz Report")
+        autoviz_html = gr.HTML(label="AutoViz Report")
+        column_dropdown = gr.Dropdown(
+            label="Select Categorical Column",
+            choices=[],
+            interactive=True
+        )
         with gr.Tabs():
             with gr.TabItem("Data Upload & Preview"):
                 file_input = gr.File(label="Upload CSV")
             with gr.TabItem("AutoViz Analysis"):
+                gr.Markdown("""
+                ### AutoViz Analysis
+                Automatic visualization of your dataset
+                """)
+            with gr.TabItem("Categorical Analysis"):
+                encoder_dropdown = gr.Dropdown(
+                    label="Select Encoder",
+                    choices=['binary', 'onehot', 'catboost', 'count'],
+                    value='binary',
+                    interactive=True
+                )
+                plot_output = gr.Image(label="UMAP Visualization")
         def process_file(file):
             if file is None:
             try:
                 df = pd.read_csv(file.name)
                 preview = df.head()
                 sweetviz_report = analyzer.generate_sweetviz_report(df)
                 autoviz_report = analyzer.generate_autoviz_report(df)
                 cat_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
                 return (
                     gr.Dropdown(choices=cat_columns)
                 )
             except Exception as e:
+                return None, f"Error: {str(e)}", f"Error: {str(e)}", gr.Dropdown(choices=[])
         file_input.change(
             fn=process_file,
             inputs=[file_input],
             outputs=[data_preview, report_html, autoviz_html, column_dropdown]
         )
     return demo