import streamlit as st import pandas as pd import psycopg2 import os # Load DB credentials from Hugging Face secrets or environment variables DB_HOST = os.getenv("DB_HOST") DB_PORT = os.getenv("DB_PORT", "5432") DB_NAME = os.getenv("DB_NAME") DB_USER = os.getenv("DB_USER") DB_PASSWORD = os.getenv("DB_PASSWORD") @st.cache_data(ttl=600) def get_data(): try: conn = psycopg2.connect( host=DB_HOST, port=DB_PORT, dbname=DB_NAME, user=DB_USER, password=DB_PASSWORD, sslmode="require" ) query = "SELECT country, year, section, question_code, question_text, answer_code, answer_text FROM survey_info;" df = pd.read_sql_query(query, conn) conn.close() return df except Exception as e: st.error(f"Failed to connect to the database: {e}") st.stop() # Load data df = get_data() # Streamlit UI st.title("🌍 CGD Survey Explorer (Live DB)") st.sidebar.header("🔎 Filter Questions") # Multiselect filters with default = show all country_options = sorted(df["country"].dropna().unique()) year_options = sorted(df["year"].dropna().unique()) selected_countries = st.sidebar.multiselect("Select Country/Countries", country_options) selected_years = st.sidebar.multiselect("Select Year(s)", year_options) keyword = st.sidebar.text_input("Keyword Search", "") group_by_question = st.sidebar.checkbox("Group by Question Text") # Apply filters filtered = df[ (df["country"].isin(selected_countries) if selected_countries else True) & (df["year"].isin(selected_years) if selected_years else True) & (df["question_text"].str.contains(keyword, case=False, na=False)) ] # Output if group_by_question: st.subheader("📊 Grouped by Question Text") grouped = ( filtered.groupby("question_text") .agg({ "country": lambda x: sorted(set(x)), "year": lambda x: sorted(set(x)), "answer_text": lambda x: list(x)[:3] # preview up to 3 answers }) .reset_index() .rename(columns={ "country": "Countries", "year": "Years", "answer_text": "Sample Answers" }) ) st.dataframe(grouped) if grouped.empty: st.info("No questions found with current filters.") else: # Context-aware heading heading_parts = [] if selected_countries: heading_parts.append("Countries: " + ", ".join(selected_countries)) if selected_years: heading_parts.append("Years: " + ", ".join(map(str, selected_years))) if heading_parts: st.markdown("### Results for " + " | ".join(heading_parts)) else: st.markdown("### Results for All Countries and Years") st.dataframe(filtered[["country", "year", "question_text", "answer_text"]]) if filtered.empty: st.info("No matching questions found.")