Spaces:

shukdevdattaEX
/

Data-Summarizer-Excel-CSV

Running

App Files Files Community

shukdevdattaEX commited on 11 days ago

Commit

d513747

verified ·

1 Parent(s): 5f8125b

Create v1.txt

Browse files

Files changed (1) hide show

v1.txt +246 -0

v1.txt ADDED Viewed

	@@ -0,0 +1,246 @@

+import gradio as gr
+import pandas as pd
+import aiohttp
+import asyncio
+import json
+import io
+import os
+from typing import Optional, Tuple
+class DataAnalyzer:
+    def __init__(self):
+        self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
+    async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
+        """Send data to Chutes API for analysis"""
+        headers = {
+            "Authorization": f"Bearer {api_token}",
+            "Content-Type": "application/json"
+        }
+        # Create the prompt based on whether it's initial analysis or follow-up question
+        if user_question:
+            prompt = f"""Based on this dataset summary:
+{data_summary}
+User question: {user_question}
+Please provide a detailed answer based on the data."""
+        else:
+            prompt = f"""Analyze the following dataset and provide comprehensive insights:
+{data_summary}
+Please provide:
+1. Key statistical insights
+2. Notable patterns or trends
+3. Data quality observations
+4. Business recommendations
+5. Potential areas for further analysis
+Keep the analysis clear, actionable, and data-driven."""
+        body = {
+            "model": "openai/gpt-oss-20b",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "stream": True,
+            "max_tokens": 2048,
+            "temperature": 0.3  # Lower temperature for more consistent analysis
+        }
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(self.api_base_url, headers=headers, json=body) as response:
+                    if response.status != 200:
+                        return f"Error: API request failed with status {response.status}"
+                    full_response = ""
+                    async for line in response.content:
+                        line = line.decode("utf-8").strip()
+                        if line.startswith("data: "):
+                            data = line[6:]
+                            if data == "[DONE]":
+                                break
+                            try:
+                                chunk_data = json.loads(data)
+                                if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
+                                    delta = chunk_data["choices"][0].get("delta", {})
+                                    content = delta.get("content", "")
+                                    if content:
+                                        full_response += content
+                            except json.JSONDecodeError:
+                                continue
+                    return full_response if full_response else "No response received from the model."
+        except Exception as e:
+            return f"Error connecting to Chutes API: {str(e)}"
+    def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
+        """Process uploaded CSV or Excel file"""
+        try:
+            file_extension = os.path.splitext(file_path)[1].lower()
+            if file_extension == '.csv':
+                df = pd.read_csv(file_path)
+            elif file_extension in ['.xlsx', '.xls']:
+                df = pd.read_excel(file_path)
+            else:
+                raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
+            # Generate comprehensive data summary
+            summary = self.generate_data_summary(df)
+            return df, summary
+        except Exception as e:
+            raise Exception(f"Error processing file: {str(e)}")
+    def generate_data_summary(self, df: pd.DataFrame) -> str:
+        """Generate a comprehensive summary of the dataset"""
+        summary = []
+        # Basic info
+        summary.append(f"Dataset Overview:")
+        summary.append(f"- Shape: {df.shape[0]} rows × {df.shape[1]} columns")
+        summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")
+        # Column information
+        summary.append(f"\nColumn Information:")
+        for i, (col, dtype) in enumerate(df.dtypes.items()):
+            null_count = df[col].isnull().sum()
+            null_pct = (null_count / len(df)) * 100
+            summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")
+        # Numerical columns statistics
+        numeric_cols = df.select_dtypes(include=['number']).columns
+        if len(numeric_cols) > 0:
+            summary.append(f"\nNumerical Columns Summary:")
+            for col in numeric_cols:
+                stats = df[col].describe()
+                summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")
+        # Categorical columns
+        categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+        if len(categorical_cols) > 0:
+            summary.append(f"\nCategorical Columns Summary:")
+            for col in categorical_cols:
+                unique_count = df[col].nunique()
+                most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
+                summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")
+        # Sample data
+        summary.append(f"\nFirst 5 rows preview:")
+        summary.append(df.head().to_string())
+        return "\n".join(summary)
+# Initialize the analyzer
+analyzer = DataAnalyzer()
+async def analyze_data(file, api_key, user_question=""):
+    """Main function to analyze uploaded data"""
+    if not file:
+        return "Please upload a CSV or Excel file.", "", ""
+    if not api_key:
+        return "Please enter your Chutes API key.", "", ""
+    try:
+        # Process the uploaded file
+        df, data_summary = analyzer.process_file(file.name)
+        # Get AI analysis
+        ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
+        # Format the complete response
+        response = f"""## 📊 Data Analysis Complete!
+### 📈 Dataset Overview:
+{data_summary}
+### 🤖 AI Insights & Recommendations:
+{ai_analysis}
+"""
+        return response, data_summary, df.head(10).to_html()
+    except Exception as e:
+        return f"Error: {str(e)}", "", ""
+def sync_analyze_data(file, api_key, user_question=""):
+    """Synchronous wrapper for the async analyze function"""
+    return asyncio.run(analyze_data(file, api_key, user_question))
+# Create the Gradio interface
+with gr.Blocks(title="📊 Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
+    gr.Markdown("""
+    # 📊 Smart Data Analyzer
+    ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            # File upload
+            file_input = gr.File(
+                label="📁 Upload CSV or Excel File",
+                file_types=[".csv", ".xlsx", ".xls"],
+                file_count="single"
+            )
+            # API key input
+            api_key_input = gr.Textbox(
+                label="🔑 Chutes API Key",
+                placeholder="Enter your Chutes API token here...",
+                type="password",
+                lines=1
+            )
+            # Optional question input
+            question_input = gr.Textbox(
+                label="❓ Ask a Specific Question (Optional)",
+                placeholder="e.g., What are the sales trends? Which region performs best?",
+                lines=2
+            )
+            # Analyze button
+            analyze_btn = gr.Button("🚀 Analyze Data", variant="primary", size="lg")
+        with gr.Column(scale=2):
+            # Results display
+            analysis_output = gr.Markdown(
+                label="📋 Analysis Results",
+                value="Upload a file and click 'Analyze Data' to see insights..."
+            )
+    # Additional outputs (hidden by default)
+    with gr.Accordion("📊 Data Preview", open=False):
+        data_preview = gr.HTML(label="First 10 Rows")
+    with gr.Accordion("🔍 Raw Data Summary", open=False):
+        raw_summary = gr.Textbox(label="Dataset Summary", lines=10)
+    # Event handlers
+    analyze_btn.click(
+        fn=sync_analyze_data,
+        inputs=[file_input, api_key_input, question_input],
+        outputs=[analysis_output, raw_summary, data_preview]
+    )
+    # Example section
+    gr.Markdown("""
+    ### 💡 Tips for Best Results:
+    - **File Size**: Keep files under 10MB for fastest processing
+    - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
+    - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations
+    - **Formats**: Supports CSV, XLSX, and XLS files
+    ### 🎯 Example Questions to Ask:
+    - "What are the key trends in this sales data?"
+    - "Which products are underperforming?"
+    - "Are there any seasonal patterns?"
+    - "What recommendations do you have based on this data?"
+    """)
+# Launch the application
+if __name__ == "__main__":
+    app.launch(
+        share=True
+    )