import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import requests
from bs4 import BeautifulSoup
from scipy import stats  # FIX: Importing `stats` for z-score calculation

# Try importing wordcloud, handle missing package
try:
    from wordcloud import WordCloud
except ImportError:
    WordCloud = None

# Try importing Hugging Face LLM for AI insights
try:
    from transformers import pipeline
    llm_pipeline = pipeline("text-generation", model="facebook/opt-1.3b")  # Can use GPT-like models
except ImportError:
    llm_pipeline = None

# Set Streamlit page config
st.set_page_config(page_title="Conversational Data Analysis", page_icon="📊")
pd.set_option("display.max_columns", None)
sns.set_style("whitegrid")

### 📝 FUNCTION TO LOAD DATA ###
def load_data(uploaded_file):
    """Load a CSV, Excel, or JSON file into a Pandas DataFrame."""
    try:
        file_extension = uploaded_file.name.split(".")[-1]
        if file_extension == "csv":
            df = pd.read_csv(uploaded_file)
        elif file_extension in ["xlsx", "xls"]:
            df = pd.read_excel(uploaded_file, engine="openpyxl")
        elif file_extension == "json":
            df = pd.json_normalize(json.load(uploaded_file))
        else:
            st.error("❌ Unsupported file format. Use CSV, Excel, or JSON.")
            return None

        st.success(f"✅ Data loaded successfully: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        st.error(f"❌ Error loading file: {e}")
        return None

### 🔎 AUTOMATED DATA ANALYSIS ###
def analyze_data(df):
    """Perform automated analysis for trends, anomalies, and insights."""
    
    insights = []
    
    # 1. Missing Data Analysis
    missing_values = df.isnull().sum()
    missing_report = missing_values[missing_values > 0]
    if not missing_report.empty:
        insights.append(f"🔎 Missing Data Found:\n{missing_report.to_string()}")

    # 2. Summary Statistics
    insights.append(f"📊 Data Summary:\n{df.describe().to_string()}")

    # 3. Correlation Analysis
    if df.select_dtypes(include=[np.number]).shape[1] > 1:
        corr_matrix = df.corr().round(2)
        insights.append(f"📈 Correlation Matrix:\n{corr_matrix.to_string()}")

    # 4. Outlier Detection (FIXED: Now works correctly)
    if not df.select_dtypes(include=[np.number]).empty:
        z_scores = np.abs(stats.zscore(df.select_dtypes(include=[np.number]), nan_policy="omit"))
        outliers = np.where(z_scores > 3)
        if outliers[0].size > 0:
            insights.append(f"⚠️ Outliers Detected: {outliers[0].size} extreme values found.")

    return "\n\n".join(insights)

### 📊 FUNCTION FOR CATEGORICAL DATA VISUALIZATION ###
def visualize_categorical_data(df, column, chart_type):
    """Generates bar chart, pie chart, or word cloud for categorical columns."""
    if column not in df.columns:
        st.error(f"⚠️ Column '{column}' not found!")
        return

    data = df[column].dropna()
    fig, ax = plt.subplots(figsize=(8, 5))

    if chart_type == "Bar Chart":
        data.value_counts().plot(kind="bar", color="purple", ax=ax)
        ax.set_title(f"Bar Chart of {column}")

    elif chart_type == "Pie Chart":
        data.value_counts().plot(kind="pie", autopct="%1.1f%%", startangle=90, cmap="coolwarm", ax=ax)
        ax.set_ylabel("")
        ax.set_title(f"Pie Chart of {column}")

    elif chart_type == "Word Cloud":
        if WordCloud is None:
            st.error("⚠️ `wordcloud` is not installed. Run `pip install wordcloud` to enable this feature.")
            return
        text = " ".join(data.astype(str))
        wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)
        ax.imshow(wordcloud, interpolation="bilinear")
        ax.axis("off")
        ax.set_title(f"Word Cloud for {column}")

    st.pyplot(fig)

### 🤖 AI-POWERED INSIGHTS ###
def generate_ai_summary(df):
    """Uses an AI model to generate insights from the data trends."""
    if llm_pipeline is None:
        return "⚠️ AI insights unavailable. Install `transformers` and load an LLM model."

    prompt = f"""
    Analyze the following dataset summary:
    {analyze_data(df)}

    Provide insights on trends, anomalies, and patterns in natural language:
    """

    ai_response = llm_pipeline(prompt, max_length=250, num_return_sequences=1)
    return ai_response[0]["generated_text"]

### 🏁 MAIN FUNCTION ###
def main():
    """Interactive chatbot for conversational data analysis."""
    st.title("📊 Conversational Data Analysis Chatbot")
    st.write("Upload a dataset and chat about trends, patterns, and anomalies!")

    # Step 1: Ask the user what they want to do
    action = st.radio("What would you like to do?", ["Analyze a Spreadsheet", "Scrape a Website"])

    if action == "Analyze a Spreadsheet":
        uploaded_file = st.file_uploader("📂 Upload a CSV, Excel, or JSON file", type=["csv", "xlsx", "json"])
        if uploaded_file:
            df = load_data(uploaded_file)
            if df is not None:
                # Generate full analysis and display insights
                st.subheader("📊 Automated Data Analysis")
                st.text(analyze_data(df))

                # AI-Powered Insights
                st.subheader("🤖 AI Summary of Data Trends")
                st.text(generate_ai_summary(df))

                # Let user interact with data
                data_type = st.radio("What would you like to explore?", ["Numerical Trends", "Categorical Insights"])

                if data_type == "Numerical Trends":
                    numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
                    if numeric_columns:
                        column = st.selectbox("🔢 Select a numerical column:", numeric_columns)
                        chart_type = st.selectbox("📊 Choose a chart type:", ["Histogram", "Boxplot", "2D Line Chart"])
                        if st.button("Generate Chart"):
                            visualize_categorical_data(df, column, chart_type)
                    else:
                        st.warning("⚠️ No numerical columns found in the dataset.")

                elif data_type == "Categorical Insights":
                    categorical_columns = df.select_dtypes(include=["object"]).columns.tolist()
                    if categorical_columns:
                        column = st.selectbox("🔠 Select a categorical column:", categorical_columns)
                        chart_type = st.radio("📊 Choose a visualization:", ["Bar Chart", "Pie Chart", "Word Cloud"])
                        if st.button("Generate Chart"):
                            visualize_categorical_data(df, column, chart_type)
                    else:
                        st.warning("⚠️ No categorical columns found in the dataset.")

if __name__ == "__main__":
    main()