Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.ensemble import IsolationForest | |
from sklearn.preprocessing import StandardScaler | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import torch | |
st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide") | |
st.title("π Industry 5.0 | Smart Factory RAG Assistant (Open Source)") | |
# Load the open-source model (Mistral-7B-Instruct) | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") | |
model = AutoModelForCausalLM.from_pretrained( | |
"mistralai/Mistral-7B-Instruct-v0.2", | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
return pipeline("text-generation", model=model, tokenizer=tokenizer) | |
nlp = load_model() | |
# File Upload | |
uploaded_file = st.file_uploader("π Upload your factory CSV data", type=["csv"]) | |
if uploaded_file: | |
df = pd.read_csv(uploaded_file) | |
st.success("β File uploaded and loaded!") | |
# Custom column selection for preview | |
st.subheader("π Data Preview") | |
selected_columns = st.multiselect("Select columns to preview", df.columns.tolist(), default=df.columns.tolist()[:5]) | |
st.dataframe(df[selected_columns].head()) | |
# Descriptive Stats | |
st.subheader("π Descriptive Statistics") | |
st.dataframe(df.describe().T) | |
# Correlation Analysis | |
st.subheader("π Parameter Correlation Heatmap") | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
corr = df.corr(numeric_only=True) | |
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax) | |
st.pyplot(fig) | |
# Technical Visualizations | |
st.subheader("π Technical Graphs") | |
numeric_columns = df.select_dtypes(include='number').columns.tolist() | |
# Time Series Plot | |
selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns) | |
time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0) | |
fig2, ax2 = plt.subplots(figsize=(10, 4)) | |
if time_column != 'Index': | |
try: | |
df[time_column] = pd.to_datetime(df[time_column]) | |
df_sorted = df.sort_values(by=time_column) | |
ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column]) | |
ax2.set_xlabel(time_column) | |
except: | |
ax2.plot(df[selected_graph_column]) | |
ax2.set_xlabel("Index") | |
else: | |
ax2.plot(df[selected_graph_column]) | |
ax2.set_xlabel("Index") | |
ax2.set_title(f"Trend Over Time: {selected_graph_column}") | |
ax2.set_ylabel(selected_graph_column) | |
st.pyplot(fig2) | |
# Pairplot | |
if len(numeric_columns) > 1: | |
st.subheader("π Pairwise Parameter Relationships") | |
sampled_df = df[numeric_columns].sample(n=200, random_state=1) if len(df) > 200 else df[numeric_columns] | |
pair_fig = sns.pairplot(sampled_df) | |
st.pyplot(pair_fig) | |
# Boxplots | |
st.subheader("π Distribution & Outliers per Parameter") | |
selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns) | |
fig3, ax3 = plt.subplots() | |
sns.boxplot(y=df[selected_box_column], ax=ax3) | |
ax3.set_title(f"Boxplot: {selected_box_column}") | |
st.pyplot(fig3) | |
# Anomaly Detection | |
st.subheader("β οΈ Anomaly Detection using Isolation Forest") | |
num_df = df.select_dtypes(include='number').dropna() | |
scaler = StandardScaler() | |
X_scaled = scaler.fit_transform(num_df) | |
iso = IsolationForest(contamination=0.05) | |
df['Anomaly'] = iso.fit_predict(X_scaled) | |
anomalies = df[df['Anomaly'] == -1] | |
st.write(f"Detected {len(anomalies)} anomalies") | |
st.dataframe(anomalies.head(10)) | |
# Role-based Assistant | |
st.subheader("π§ Role-Based Decision Assistant") | |
role = st.selectbox("Select your role", ["Engineer", "Operator"]) | |
question = st.text_input("Ask a question based on the data analysis") | |
if question: | |
with st.spinner("Generating insights..."): | |
summary = df.describe().to_string() | |
corr_text = corr.to_string() | |
anomaly_count = len(anomalies) | |
context = f""" | |
You are a highly skilled {role} working in a smart manufacturing facility. | |
Here is a summary of the uploaded data: | |
STATISTICAL SUMMARY: | |
{summary} | |
PARAMETER CORRELATIONS: | |
{corr_text} | |
ANOMALY DETECTION: | |
{anomaly_count} anomalies detected using Isolation Forest method. | |
Based on this context, answer the following question in a clear, technically accurate manner and suggest best decisions from the point of view of a {role}. | |
QUESTION: {question} | |
ANSWER: | |
""" | |
prompt = f"<s>[INST] {context} [/INST]" | |
output = nlp(prompt, max_new_tokens=512, do_sample=True, temperature=0.5)[0]['generated_text'] | |
# Clean up response | |
if '[/INST]' in output: | |
answer = output.split('[/INST]')[-1].strip() | |
else: | |
answer = output.strip() | |
st.success("β Recommendation:") | |
st.markdown(f"**{answer}**") | |
else: | |
st.info("π Please upload a factory CSV data file to begin analysis.") | |