File size: 2,917 Bytes
d3a33c8
 
 
23381bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35c1ade
1068edf
 
23381bb
4a806db
23381bb
 
 
 
4b5445a
23381bb
 
 
 
 
 
 
d3a33c8
443384f
 
 
4e71d04
 
 
 
 
81d02b5
443384f
81d02b5
4e71d04
81d02b5
4e71d04
 
81d02b5
 
 
443384f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e71d04
443384f
 
 
 
 
 
 
 
 
 
4e71d04
443384f
81d02b5
443384f
 
23381bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

import streamlit as st
import pandas as pd
import psycopg2
import os

# Load DB credentials from Hugging Face secrets or environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

@st.cache_data(ttl=600)
def get_data():
    try:
        conn = psycopg2.connect(
            host=DB_HOST,
            port=DB_PORT,
            dbname=DB_NAME,
            user=DB_USER,
            password=DB_PASSWORD,
            sslmode="require"

        )
        query = "SELECT country, year, section, question_code, question_text, answer_code, answer_text FROM survey_info;"
        df = pd.read_sql_query(query, conn)
        conn.close()
        return df
    except Exception as e:
        st.error(f"Failed to connect to the database: {e}")
        st.stop()

# Load data
df = get_data()

# Streamlit UI
st.title("๐ŸŒ CGD Survey Explorer (Live DB)")

st.sidebar.header("๐Ÿ”Ž Filter Questions")

# Multiselect filters with default = show all
country_options = sorted(df["country"].dropna().unique())
year_options = sorted(df["year"].dropna().unique())

selected_countries = st.sidebar.multiselect("Select Country/Countries", country_options)
selected_years = st.sidebar.multiselect("Select Year(s)", year_options)
keyword = st.sidebar.text_input("Keyword Search", "")
group_by_question = st.sidebar.checkbox("Group by Question Text")

# Apply filters
filtered = df[
    (df["country"].isin(selected_countries) if selected_countries else True) &
    (df["year"].isin(selected_years) if selected_years else True) &
    (df["question_text"].str.contains(keyword, case=False, na=False))
]

# Output
if group_by_question:
    st.subheader("๐Ÿ“Š Grouped by Question Text")

    grouped = (
        filtered.groupby("question_text")
        .agg({
            "country": lambda x: sorted(set(x)),
            "year": lambda x: sorted(set(x)),
            "answer_text": lambda x: list(x)[:3]  # preview up to 3 answers
        })
        .reset_index()
        .rename(columns={
            "country": "Countries",
            "year": "Years",
            "answer_text": "Sample Answers"
        })
    )

    st.dataframe(grouped)

    if grouped.empty:
        st.info("No questions found with current filters.")

else:
    # Context-aware heading
    heading_parts = []
    if selected_countries:
        heading_parts.append("Countries: " + ", ".join(selected_countries))
    if selected_years:
        heading_parts.append("Years: " + ", ".join(map(str, selected_years)))
    if heading_parts:
        st.markdown("### Results for " + " | ".join(heading_parts))
    else:
        st.markdown("### Results for All Countries and Years")

    st.dataframe(filtered[["country", "year", "question_text", "answer_text"]])

    if filtered.empty:
        st.info("No matching questions found.")