data / app.py
Garvitj's picture
Update app.py
1b53616 verified
import streamlit as st
import pandas as pd
import json
import subprocess
try:
import plotly.express as px
except ModuleNotFoundError:
subprocess.run(["pip", "install", "plotly"])
import plotly.express as px
import re
import io
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load DeepSeek Model
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
def query_deepseek(prompt):
"""
Query the DeepSeek model and return the response.
"""
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=150)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.strip()
def extract_json(text):
"""
Extract JSON from the DeepSeek response using regex.
"""
match = re.search(r"\{.*\}", text, re.DOTALL)
if match:
try:
return json.loads(match.group(0))
except json.JSONDecodeError:
st.error("JSON Decode Error!")
return None
return None
def get_visualization_suggestion(data):
"""
Send dataset columns to DeepSeek and get suggestions for visualization.
"""
prompt = f"""
I have the following dataset columns: {', '.join(data.columns)}.
Suggest the best type of visualization for this dataset.
Return only a valid JSON response in the following format:
{{
"x": "column_name",
"y": "column_name",
"chart_type": "bar/line/scatter/pie"
}}
"""
response = query_deepseek(prompt)
return extract_json(response)
def extract_csv_from_response(response):
"""
Dynamically extract CSV data from a response string.
"""
lines = response.splitlines()
csv_data = [line.strip() for line in lines if '"' in line and ',' in line]
return '\n'.join(csv_data) if csv_data else None
def generate_demo_data_csv(user_input, num_rows=10):
"""Generates realistic demo data using the LLM in valid CSV format."""
prompt = f"""
Generate a structured dataset with {num_rows} rows based on the following request:
"{user_input}"
Ensure the response is in valid CSV format, with column headers and quoted text values.
"""
response = query_deepseek(prompt).strip()
csv_data = extract_csv_from_response(response)
if csv_data:
try:
df = pd.read_csv(io.StringIO(csv_data))
file_path = "generated_data.csv"
df.to_csv(file_path, index=False)
return "Demo data generated as CSV.", file_path
except Exception as e:
return f"Error: Invalid CSV format. {str(e)}", None
else:
return "Error: No valid CSV data found in the response.", None
def query_sql_generator(user_query):
"""Generate SQL queries from natural language."""
prompt = f"I just want a SQL Query corresponding to: {user_query} and no explanation."
return query_deepseek(prompt)
# Streamlit UI
st.set_page_config(page_title="AI-Powered Dashboard", layout="wide")
st.title("πŸ€– AI-Powered Multi-Feature Dashboard")
# Sidebar for navigation
st.sidebar.title("Navigation")
option = st.sidebar.radio("Select Feature", ["πŸ“Š Data Visualization", "🧠 SQL Query Generator", "πŸ“„ Demo Data Generator"])
if option == "πŸ“Š Data Visualization":
uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.write("### Preview of Data")
st.dataframe(df.head())
with st.spinner("Getting visualization suggestions from DeepSeek..."):
suggestion = get_visualization_suggestion(df)
if suggestion:
chart_type, x_col, y_col = suggestion.get("chart_type"), suggestion.get("x"), suggestion.get("y")
if x_col not in df.columns or y_col not in df.columns:
st.error("DeepSeek suggested invalid column names.")
else:
st.write(f"### Suggested Chart: {chart_type.capitalize()} Chart")
chart_map = {
"bar": px.bar,
"line": px.line,
"scatter": px.scatter,
"pie": lambda df, x, y: px.pie(df, names=x, values=y)
}
if chart_type in chart_map:
fig = chart_map[chart_type](df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}")
st.plotly_chart(fig)
else:
st.error("Unsupported chart type suggested.")
elif option == "🧠 SQL Query Generator":
text_input = st.text_area("Enter your Query here in Plain English:")
if st.button("Generate SQL Query"):
with st.spinner("Generating SQL Query..."):
st.write(query_sql_generator(text_input))
elif option == "πŸ“„ Demo Data Generator":
user_input = st.text_area("Describe the dataset you want:")
num_rows = st.number_input("Number of rows", min_value=1, max_value=1000, value=10)
if st.button("Generate Dataset"):
with st.spinner("Generating Demo Data..."):
message, file_path = generate_demo_data_csv(user_input, num_rows)
st.write(message)
if file_path:
st.download_button("Download CSV", open(file_path, "rb"), file_name="generated_data.csv", mime="text/csv")