Spaces:

bhagwandas
/

FGPT

Sleeping

File size: 5,210 Bytes

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide")

st.title("🏠 Industry 5.0 | Smart Factory RAG Assistant (Open Source)")

# Load the open-source model (Mistral-7B-Instruct)
@st.cache_resource(show_spinner=True)
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
    model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        torch_dtype=torch.float16,
        device_map="auto"
    )
    return pipeline("text-generation", model=model, tokenizer=tokenizer)

nlp = load_model()

# File Upload
uploaded_file = st.file_uploader("📄 Upload your factory CSV data", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.success("✅ File uploaded and loaded!")

    # Custom column selection for preview
    st.subheader("📃 Data Preview")
    selected_columns = st.multiselect("Select columns to preview", df.columns.tolist(), default=df.columns.tolist()[:5])
    st.dataframe(df[selected_columns].head())

    # Descriptive Stats
    st.subheader("📊 Descriptive Statistics")
    st.dataframe(df.describe().T)

    # Correlation Analysis
    st.subheader("🔗 Parameter Correlation Heatmap")
    fig, ax = plt.subplots(figsize=(10, 6))
    corr = df.corr(numeric_only=True)
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax)
    st.pyplot(fig)

    # Technical Visualizations
    st.subheader("📊 Technical Graphs")
    numeric_columns = df.select_dtypes(include='number').columns.tolist()

    # Time Series Plot
    selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns)
    time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0)

    fig2, ax2 = plt.subplots(figsize=(10, 4))
    if time_column != 'Index':
        try:
            df[time_column] = pd.to_datetime(df[time_column])
            df_sorted = df.sort_values(by=time_column)
            ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column])
            ax2.set_xlabel(time_column)
        except:
            ax2.plot(df[selected_graph_column])
            ax2.set_xlabel("Index")
    else:
        ax2.plot(df[selected_graph_column])
        ax2.set_xlabel("Index")
    ax2.set_title(f"Trend Over Time: {selected_graph_column}")
    ax2.set_ylabel(selected_graph_column)
    st.pyplot(fig2)

    # Pairplot
    if len(numeric_columns) > 1:
        st.subheader("🔄 Pairwise Parameter Relationships")
        sampled_df = df[numeric_columns].sample(n=200, random_state=1) if len(df) > 200 else df[numeric_columns]
        pair_fig = sns.pairplot(sampled_df)
        st.pyplot(pair_fig)

    # Boxplots
    st.subheader("📈 Distribution & Outliers per Parameter")
    selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns)
    fig3, ax3 = plt.subplots()
    sns.boxplot(y=df[selected_box_column], ax=ax3)
    ax3.set_title(f"Boxplot: {selected_box_column}")
    st.pyplot(fig3)

    # Anomaly Detection
    st.subheader("⚠️ Anomaly Detection using Isolation Forest")
    num_df = df.select_dtypes(include='number').dropna()
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(num_df)

    iso = IsolationForest(contamination=0.05)
    df['Anomaly'] = iso.fit_predict(X_scaled)
    anomalies = df[df['Anomaly'] == -1]
    st.write(f"Detected {len(anomalies)} anomalies")
    st.dataframe(anomalies.head(10))

    # Role-based Assistant
    st.subheader("🧠 Role-Based Decision Assistant")
    role = st.selectbox("Select your role", ["Engineer", "Operator"])
    question = st.text_input("Ask a question based on the data analysis")

    if question:
        with st.spinner("Generating insights..."):
            summary = df.describe().to_string()
            corr_text = corr.to_string()
            anomaly_count = len(anomalies)

            context = f"""
You are a highly skilled {role} working in a smart manufacturing facility.

Here is a summary of the uploaded data:

STATISTICAL SUMMARY:
{summary}

PARAMETER CORRELATIONS:
{corr_text}

ANOMALY DETECTION:
{anomaly_count} anomalies detected using Isolation Forest method.

Based on this context, answer the following question in a clear, technically accurate manner and suggest best decisions from the point of view of a {role}.

QUESTION: {question}
ANSWER:
"""
            prompt = f"<s>[INST] {context} [/INST]"
            output = nlp(prompt, max_new_tokens=512, do_sample=True, temperature=0.5)[0]['generated_text']

            # Clean up response
            if '[/INST]' in output:
                answer = output.split('[/INST]')[-1].strip()
            else:
                answer = output.strip()

            st.success("✅ Recommendation:")
            st.markdown(f"**{answer}**")

else:
    st.info("📂 Please upload a factory CSV data file to begin analysis.")