Spaces:
Running
Running
File size: 3,155 Bytes
d3a33c8 23381bb 35c1ade 1068edf 23381bb 4a806db 23381bb 4b5445a 23381bb d3a33c8 cc9cf8b 4e71d04 fad7dca 9cb0a2b fad7dca cc9cf8b 81d02b5 4e71d04 81d02b5 4e71d04 cca0254 9cb0a2b cca0254 81d02b5 cc9cf8b 4e71d04 cc9cf8b 4e71d04 cc9cf8b 81d02b5 cc9cf8b 23381bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
import pandas as pd
import psycopg2
import os
# Load DB credentials from Hugging Face secrets or environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
@st.cache_data(ttl=600)
def get_data():
try:
conn = psycopg2.connect(
host=DB_HOST,
port=DB_PORT,
dbname=DB_NAME,
user=DB_USER,
password=DB_PASSWORD,
sslmode="require"
)
query = "SELECT country, year, section, question_code, question_text, answer_code, answer_text FROM survey_info;"
df = pd.read_sql_query(query, conn)
conn.close()
return df
except Exception as e:
st.error(f"Failed to connect to the database: {e}")
st.stop()
# Load data
df = get_data()
# Streamlit UI
st.title("๐ CGD Survey Explorer (Live DB)")
st.sidebar.header("๐ Filter Questions")
# Multiselect filters with default = show all
country_options = sorted(df["country"].dropna().unique())
year_options = sorted(df["year"].dropna().unique())
selected_countries = st.sidebar.multiselect("Select Country/Countries", country_options)
selected_years = st.sidebar.multiselect("Select Year(s)", year_options)
keyword = st.sidebar.text_input(
"Keyword Search (Question text / Answer text / Question code)", ""
) #NEW
group_by_question = st.sidebar.checkbox("Group by Question Text")
# Apply filters
filtered = df[
(df["country"].isin(selected_countries) if selected_countries else True) &
(df["year"].isin(selected_years) if selected_years else True) &
(
df["question_text"].str.contains(keyword, case=False, na=False) |
df["answer_text"].str.contains(keyword, case=False, na=False) |
df["question_code"].astype(str).str.contains(keyword, case=False, na=False) # NEW
)
]
# Output
if group_by_question:
st.subheader("๐ Grouped by Question Text")
grouped = (
filtered.groupby("question_text")
.agg({
"country": lambda x: sorted(set(x)),
"year": lambda x: sorted(set(x)),
"answer_text": lambda x: list(x)[:3] # preview up to 3 answers
})
.reset_index()
.rename(columns={
"country": "Countries",
"year": "Years",
"answer_text": "Sample Answers"
})
)
st.dataframe(grouped)
if grouped.empty:
st.info("No questions found with current filters.")
else:
# Context-aware heading
heading_parts = []
if selected_countries:
heading_parts.append("Countries: " + ", ".join(selected_countries))
if selected_years:
heading_parts.append("Years: " + ", ".join(map(str, selected_years)))
if heading_parts:
st.markdown("### Results for " + " | ".join(heading_parts))
else:
st.markdown("### Results for All Countries and Years")
st.dataframe(filtered[["country", "year", "question_text", "answer_text"]])
if filtered.empty:
st.info("No matching questions found.")
|