Spaces:

Garvitj
/

data

Sleeping

App Files Files Community

Garvitj commited on Apr 2

Commit

e85f31f

verified ·

1 Parent(s): 2d42813

Create app.py

Browse files

Files changed (1) hide show

app.py +139 -0

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import streamlit as st
+import pandas as pd
+import json
+import plotly.express as px
+import re
+import io
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Load DeepSeek Model
+tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
+model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
+def query_deepseek(prompt):
+    """
+    Query the DeepSeek model and return the response.
+    """
+    inputs = tokenizer(prompt, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=150)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response.strip()
+def extract_json(text):
+    """
+    Extract JSON from the DeepSeek response using regex.
+    """
+    match = re.search(r"\{.*\}", text, re.DOTALL)
+    if match:
+        try:
+            return json.loads(match.group(0))
+        except json.JSONDecodeError:
+            st.error("JSON Decode Error!")
+            return None
+    return None
+def get_visualization_suggestion(data):
+    """
+    Send dataset columns to DeepSeek and get suggestions for visualization.
+    """
+    prompt = f"""
+    I have the following dataset columns: {', '.join(data.columns)}.
+    Suggest the best type of visualization for this dataset.
+    Return only a valid JSON response in the following format:
+    {{
+        "x": "column_name",
+        "y": "column_name",
+        "chart_type": "bar/line/scatter/pie"
+    }}
+    """
+    response = query_deepseek(prompt)
+    return extract_json(response)
+def extract_csv_from_response(response):
+    """
+    Dynamically extract CSV data from a response string.
+    """
+    lines = response.splitlines()
+    csv_data = [line.strip() for line in lines if '"' in line and ',' in line]
+    return '\n'.join(csv_data) if csv_data else None
+def generate_demo_data_csv(user_input, num_rows=10):
+    """Generates realistic demo data using the LLM in valid CSV format."""
+    prompt = f"""
+    Generate a structured dataset with {num_rows} rows based on the following request:
+    "{user_input}"
+    Ensure the response is in valid CSV format, with column headers and quoted text values.
+    """
+    response = query_deepseek(prompt).strip()
+    csv_data = extract_csv_from_response(response)
+    if csv_data:
+        try:
+            df = pd.read_csv(io.StringIO(csv_data))
+            file_path = "generated_data.csv"
+            df.to_csv(file_path, index=False)
+            return "Demo data generated as CSV.", file_path
+        except Exception as e:
+            return f"Error: Invalid CSV format. {str(e)}", None
+    else:
+        return "Error: No valid CSV data found in the response.", None
+def query_sql_generator(user_query):
+    """Generate SQL queries from natural language."""
+    prompt = f"I just want a SQL Query corresponding to: {user_query} and no explanation."
+    return query_deepseek(prompt)
+# Streamlit UI
+st.set_page_config(page_title="AI-Powered Dashboard", layout="wide")
+st.title("🤖 AI-Powered Multi-Feature Dashboard")
+# Sidebar for navigation
+st.sidebar.title("Navigation")
+option = st.sidebar.radio("Select Feature", ["📊 Data Visualization", "🧠 SQL Query Generator", "📄 Demo Data Generator"])
+if option == "📊 Data Visualization":
+    uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
+    if uploaded_file is not None:
+        df = pd.read_csv(uploaded_file)
+        st.write("### Preview of Data")
+        st.dataframe(df.head())
+        with st.spinner("Getting visualization suggestions from DeepSeek..."):
+            suggestion = get_visualization_suggestion(df)
+        if suggestion:
+            chart_type, x_col, y_col = suggestion.get("chart_type"), suggestion.get("x"), suggestion.get("y")
+            if x_col not in df.columns or y_col not in df.columns:
+                st.error("DeepSeek suggested invalid column names.")
+            else:
+                st.write(f"### Suggested Chart: {chart_type.capitalize()} Chart")
+                chart_map = {
+                    "bar": px.bar,
+                    "line": px.line,
+                    "scatter": px.scatter,
+                    "pie": lambda df, x, y: px.pie(df, names=x, values=y)
+                }
+                if chart_type in chart_map:
+                    fig = chart_map[chart_type](df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}")
+                    st.plotly_chart(fig)
+                else:
+                    st.error("Unsupported chart type suggested.")
+elif option == "🧠 SQL Query Generator":
+    text_input = st.text_area("Enter your Query here in Plain English:")
+    if st.button("Generate SQL Query"):
+        with st.spinner("Generating SQL Query..."):
+            st.write(query_sql_generator(text_input))
+elif option == "📄 Demo Data Generator":
+    user_input = st.text_area("Describe the dataset you want:")
+    num_rows = st.number_input("Number of rows", min_value=1, max_value=1000, value=10)
+    if st.button("Generate Dataset"):
+        with st.spinner("Generating Demo Data..."):
+            message, file_path = generate_demo_data_csv(user_input, num_rows)
+        st.write(message)
+        if file_path:
+            st.download_button("Download CSV", open(file_path, "rb"), file_name="generated_data.csv", mime="text/csv")