File size: 3,510 Bytes
e107ee4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
import pandas as pd
from pymongo import MongoClient
from dotenv import load_dotenv
import os
import json
import re
# 1. Load environment variables
load_dotenv()
MONGODB_URI = os.getenv(
"MONGODB_UR",
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
)
# 2. Create MongoDB connection
client = MongoClient(MONGODB_URI)
db = client["novascholar_db"]
collection = db["research_papers"]
def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
"""
Convert any columns that contain lists into comma-separated strings
in order to ensure consistent data types for CSV export.
"""
for col in df.columns:
if any(isinstance(val, list) for val in df[col].dropna()):
df[col] = df[col].apply(
lambda x: (
", ".join(map(str, x))
if isinstance(x, list)
else (str(x) if pd.notna(x) else "")
)
)
return df
def filter_and_export_collection_to_csv(keyword: str, doc_collection=None):
"""
Find documents in the given collection with a matching keyword
in the 'Keywords' field, export them to CSV, and return the DataFrame
and CSV filename.
"""
# Use the default 'research_papers' collection if none provided
if doc_collection is None:
doc_collection = collection
docs = list(doc_collection.find({"Keywords": {"$regex": keyword, "$options": "i"}}))
if docs:
df = pd.DataFrame(docs)
df = convert_mixed_columns(df)
csv_filename = "papers_filtered_export.csv"
df.to_csv(csv_filename, index=False)
return df, csv_filename
else:
# Return an empty DataFrame if no documents found
return pd.DataFrame(), None
def main():
# st.set_page_config(page_title="Filter and Export Papers", layout="wide")
st.title("Filter and Export Papers by Keyword")
# Let user select the paper type
paper_type = st.selectbox(
"Select type of research paper:",
[
"Review Based Paper",
"Opinion/Perspective Based Paper",
"Empirical Research Paper",
"Research Paper (Other)",
],
)
# 5. Let user enter the keyword to filter
keyword_input = st.text_input(
"Enter the exact keyword to filter papers by 'Keywords' field:"
)
# When user clicks button, use the collection for the selected paper type
if st.button("Export Filtered Papers to CSV"):
with st.spinner("Exporting filtered documents..."):
try:
# Determine dynamic collection based on paper type
collection_name = paper_type.replace(" ", "_").lower()
doc_collection = db[collection_name]
df, csv_filename = filter_and_export_collection_to_csv(
keyword_input, doc_collection
)
if not df.empty and csv_filename:
st.success(
f"Successfully exported filtered papers to {csv_filename}!"
)
st.write("Preview of the filtered DataFrame:")
st.dataframe(df)
else:
st.warning("No matching documents found for that keyword.")
except Exception as e:
st.error(f"Error exporting filtered papers: {str(e)}")
if __name__ == "__main__":
main()
|