File size: 2,858 Bytes
e107ee4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import pandas as pd
from pymongo import MongoClient
from dotenv import load_dotenv
import os

# 1. Load environment variables
load_dotenv()
MONGODB_URI = os.getenv(
    "MONGODB_UR",
    "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
)

# 2. Create MongoDB connection
client = MongoClient(MONGODB_URI)
db = client["novascholar_db"]
collection = db["research_papers"]


def get_collection_data(paper_type: str):
    """
    Fetch all documents from the specified collection based on paper type.
    """
    try:
        # Determine collection name based on paper type
        collection_name = paper_type.replace(" ", "_").lower()
        doc_collection = db[collection_name]

        # Get all documents
        docs = list(doc_collection.find())

        # Convert ObjectId to string
        for doc in docs:
            doc["_id"] = str(doc["_id"])

        return docs
    except Exception as e:
        st.error(f"Database Error: {str(e)}")
        return None


def main():
    st.title("MongoDB Collection Download")
    st.write("Download all documents from the selected research paper collection")

    # Dropdown to select the type of research paper
    paper_type = st.selectbox(
        "Select type of research paper:",
        [
            "Review Based Paper",
            "Opinion/Perspective Based Paper",
            "Empirical Research Paper",
            "Research Paper (Other)",
        ],
    )

    if st.button("Fetch Data"):
        with st.spinner("Retrieving documents from MongoDB..."):
            docs = get_collection_data(paper_type)

            if docs:
                # Convert to DataFrame
                df = pd.DataFrame(docs)
                # Convert lists to comma-separated strings for consistency
                for col in df.columns:
                    if df[col].apply(lambda x: isinstance(x, list)).any():
                        df[col] = df[col].apply(
                            lambda x: (
                                ", ".join(map(str, x)) if isinstance(x, list) else x
                            )
                        )
                st.success(
                    f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
                )
                st.dataframe(df)

                # Provide option to download the data as CSV
                csv = df.to_csv(index=False).encode("utf-8")
                st.download_button(
                    label="Download CSV",
                    data=csv,
                    file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
                    mime="text/csv",
                )
            else:
                st.warning(f"No documents found in the '{paper_type}' collection.")


if __name__ == "__main__":
    main()