Spaces:

baconnier
/

csv-plus-plus

Running

App Files Files Community

baconnier commited on Oct 26, 2024

Commit

179691f

verified ·

1 Parent(s): cb5dc7e

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -12

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ class DataAnalyzer:
         os.remove(report_path)
         return html_with_table
     def generate_autoviz_report(self, df):
         if df is None:
             return "Please upload a dataset first"
@@ -52,23 +52,43 @@ class DataAnalyzer:
         if os.path.exists(viz_temp_dir):
             shutil.rmtree(viz_temp_dir)
         os.makedirs(viz_temp_dir)
         try:
-            plt.close('all')
             dfte = self.AV.AutoViz(
                 filename='',
                 sep=',',
                 depVar='',
                 dfte=df,
                 header=0,
-                verbose=0,
                 lowess=False,
                 chart_format='html',
                 max_rows_analyzed=5000,
                 max_cols_analyzed=30,
-                save_plot_dir=viz_temp_dir
             )
             html_parts = []
             if os.path.exists(viz_temp_dir):
                 for file in sorted(os.listdir(viz_temp_dir)):
@@ -81,15 +101,48 @@ class DataAnalyzer:
                                     html_parts.append(content)
                         except Exception as e:
                             print(f"Error reading file {file}: {str(e)}")
             if not html_parts:
-                return "No visualizations were generated. Please check your data."
-            combined_html = "<br><hr><br>".join(html_parts)
             return combined_html
         except Exception as e:
-            return f"Error in AutoViz: {str(e)}"
         finally:
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)

         os.remove(report_path)
         return html_with_table
     def generate_autoviz_report(self, df):
         if df is None:
             return "Please upload a dataset first"
         if os.path.exists(viz_temp_dir):
             shutil.rmtree(viz_temp_dir)
         os.makedirs(viz_temp_dir)
         try:
+            # Sample data if it's too large
+            if len(df) > 5000:
+                df = df.sample(n=5000, random_state=42)
+            # Ensure all columns are properly formatted
+            df = df.copy()
+            # Convert numeric columns that might be stored as strings
+            for col in df.columns:
+                if df[col].dtype == 'object':
+                    try:
+                        df[col] = pd.to_numeric(df[col], errors='ignore')
+                    except:
+                        pass
+            plt.close('all')  # Close any existing plots
+            # Configure AutoViz with more specific parameters
             dfte = self.AV.AutoViz(
                 filename='',
                 sep=',',
                 depVar='',
                 dfte=df,
                 header=0,
+                verbose=1,  # Set to 1 to see progress
                 lowess=False,
                 chart_format='html',
                 max_rows_analyzed=5000,
                 max_cols_analyzed=30,
+                save_plot_dir=viz_temp_dir,
+                ignore_warnings=True,
+                sampling=True,  # Enable sampling
+                sample_size=5000
             )
+            # Collect and combine HTML files
             html_parts = []
             if os.path.exists(viz_temp_dir):
                 for file in sorted(os.listdir(viz_temp_dir)):
                                     html_parts.append(content)
                         except Exception as e:
                             print(f"Error reading file {file}: {str(e)}")
             if not html_parts:
+                return """
+                <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
+                    <h3>No visualizations were generated</h3>
+                    <p>This might be due to:</p>
+                    <ul>
+                        <li>Data format issues</li>
+                        <li>Too few unique values in columns</li>
+                        <li>All categorical data with high cardinality</li>
+                    </ul>
+                    <p>Try with a different dataset or check your data formatting.</p>
+                </div>
+                """
+            # Combine all HTML content with proper styling
+            combined_html = f"""
+            <div style="padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
+                <h2 style="text-align: center;">AutoViz Analysis Report</h2>
+                <p style="text-align: center;">Analysis of {len(df)} rows and {len(df.columns)} columns</p>
+                <hr>
+                {'<hr>'.join(html_parts)}
+            </div>
+            """
             return combined_html
         except Exception as e:
+            error_message = f"""
+            <div style="padding: 20px; border: 1px solid red; border-radius: 5px;">
+                <h3>Error in AutoViz Analysis</h3>
+                <p>Error details: {str(e)}</p>
+                <p>Troubleshooting steps:</p>
+                <ul>
+                    <li>Check if your data contains valid numerical or categorical values</li>
+                    <li>Ensure there are no completely empty columns</li>
+                    <li>Try with a smaller dataset</li>
+                    <li>Check for any special characters in column names</li>
+                </ul>
+            </div>
+            """
+            return error_message
         finally:
             if os.path.exists(viz_temp_dir):
                 shutil.rmtree(viz_temp_dir)