import streamlit as st import pandas as pd import json import subprocess try: import plotly.express as px except ModuleNotFoundError: subprocess.run(["pip", "install", "plotly"]) import plotly.express as px import re import io from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load DeepSeek Model tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B") model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B") def query_deepseek(prompt): """ Query the DeepSeek model and return the response. """ inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=150) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response.strip() def extract_json(text): """ Extract JSON from the DeepSeek response using regex. """ match = re.search(r"\{.*\}", text, re.DOTALL) if match: try: return json.loads(match.group(0)) except json.JSONDecodeError: st.error("JSON Decode Error!") return None return None def get_visualization_suggestion(data): """ Send dataset columns to DeepSeek and get suggestions for visualization. """ prompt = f""" I have the following dataset columns: {', '.join(data.columns)}. Suggest the best type of visualization for this dataset. Return only a valid JSON response in the following format: {{ "x": "column_name", "y": "column_name", "chart_type": "bar/line/scatter/pie" }} """ response = query_deepseek(prompt) return extract_json(response) def extract_csv_from_response(response): """ Dynamically extract CSV data from a response string. """ lines = response.splitlines() csv_data = [line.strip() for line in lines if '"' in line and ',' in line] return '\n'.join(csv_data) if csv_data else None def generate_demo_data_csv(user_input, num_rows=10): """Generates realistic demo data using the LLM in valid CSV format.""" prompt = f""" Generate a structured dataset with {num_rows} rows based on the following request: "{user_input}" Ensure the response is in valid CSV format, with column headers and quoted text values. """ response = query_deepseek(prompt).strip() csv_data = extract_csv_from_response(response) if csv_data: try: df = pd.read_csv(io.StringIO(csv_data)) file_path = "generated_data.csv" df.to_csv(file_path, index=False) return "Demo data generated as CSV.", file_path except Exception as e: return f"Error: Invalid CSV format. {str(e)}", None else: return "Error: No valid CSV data found in the response.", None def query_sql_generator(user_query): """Generate SQL queries from natural language.""" prompt = f"I just want a SQL Query corresponding to: {user_query} and no explanation." return query_deepseek(prompt) # Streamlit UI st.set_page_config(page_title="AI-Powered Dashboard", layout="wide") st.title("🤖 AI-Powered Multi-Feature Dashboard") # Sidebar for navigation st.sidebar.title("Navigation") option = st.sidebar.radio("Select Feature", ["📊 Data Visualization", "🧠 SQL Query Generator", "📄 Demo Data Generator"]) if option == "📊 Data Visualization": uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.write("### Preview of Data") st.dataframe(df.head()) with st.spinner("Getting visualization suggestions from DeepSeek..."): suggestion = get_visualization_suggestion(df) if suggestion: chart_type, x_col, y_col = suggestion.get("chart_type"), suggestion.get("x"), suggestion.get("y") if x_col not in df.columns or y_col not in df.columns: st.error("DeepSeek suggested invalid column names.") else: st.write(f"### Suggested Chart: {chart_type.capitalize()} Chart") chart_map = { "bar": px.bar, "line": px.line, "scatter": px.scatter, "pie": lambda df, x, y: px.pie(df, names=x, values=y) } if chart_type in chart_map: fig = chart_map[chart_type](df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}") st.plotly_chart(fig) else: st.error("Unsupported chart type suggested.") elif option == "🧠 SQL Query Generator": text_input = st.text_area("Enter your Query here in Plain English:") if st.button("Generate SQL Query"): with st.spinner("Generating SQL Query..."): st.write(query_sql_generator(text_input)) elif option == "📄 Demo Data Generator": user_input = st.text_area("Describe the dataset you want:") num_rows = st.number_input("Number of rows", min_value=1, max_value=1000, value=10) if st.button("Generate Dataset"): with st.spinner("Generating Demo Data..."): message, file_path = generate_demo_data_csv(user_input, num_rows) st.write(message) if file_path: st.download_button("Download CSV", open(file_path, "rb"), file_name="generated_data.csv", mime="text/csv")