Spaces:

SPJIMR-Internship
/

SPJIMR_FlipClassroom_RCopilot_ResearchInternship

Sleeping

App Files Files Community

SPJIMR_FlipClassroom_RCopilot_ResearchInternship / entire_download.py

YashJD

Initial Commit

e107ee4 6 months ago

raw

history blame contribute delete

2.86 kB

	import streamlit as st
	import pandas as pd
	from pymongo import MongoClient
	from dotenv import load_dotenv
	import os

	# 1. Load environment variables
	load_dotenv()
	MONGODB_URI = os.getenv(
	"MONGODB_UR",
	"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
	)

	# 2. Create MongoDB connection
	client = MongoClient(MONGODB_URI)
	db = client["novascholar_db"]
	collection = db["research_papers"]


	def get_collection_data(paper_type: str):
	"""
	Fetch all documents from the specified collection based on paper type.
	"""
	try:
	# Determine collection name based on paper type
	collection_name = paper_type.replace(" ", "_").lower()
	doc_collection = db[collection_name]

	# Get all documents
	docs = list(doc_collection.find())

	# Convert ObjectId to string
	for doc in docs:
	doc["_id"] = str(doc["_id"])

	return docs
	except Exception as e:
	st.error(f"Database Error: {str(e)}")
	return None


	def main():
	st.title("MongoDB Collection Download")
	st.write("Download all documents from the selected research paper collection")

	# Dropdown to select the type of research paper
	paper_type = st.selectbox(
	"Select type of research paper:",
	[
	"Review Based Paper",
	"Opinion/Perspective Based Paper",
	"Empirical Research Paper",
	"Research Paper (Other)",
	],
	)

	if st.button("Fetch Data"):
	with st.spinner("Retrieving documents from MongoDB..."):
	docs = get_collection_data(paper_type)

	if docs:
	# Convert to DataFrame
	df = pd.DataFrame(docs)
	# Convert lists to comma-separated strings for consistency
	for col in df.columns:
	if df[col].apply(lambda x: isinstance(x, list)).any():
	df[col] = df[col].apply(
	lambda x: (
	", ".join(map(str, x)) if isinstance(x, list) else x
	)
	)
	st.success(
	f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
	)
	st.dataframe(df)

	# Provide option to download the data as CSV
	csv = df.to_csv(index=False).encode("utf-8")
	st.download_button(
	label="Download CSV",
	data=csv,
	file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
	mime="text/csv",
	)
	else:
	st.warning(f"No documents found in the '{paper_type}' collection.")


	if __name__ == "__main__":
	main()