import pandas as pd import requests import io from pymongo import MongoClient # Function to connect to MongoDB. def get_mongo_client(): client = MongoClient("mongodb+srv://groupA:pythongroupA@sentimentcluster.4usfj.mongodb.net/?retryWrites=true&w=majority&appName=SentimentCluster") db = client["sentiment_db"] return db["tweets"] # Function to insert data if the collection is empty. def insert_data_if_empty(): collection = get_mongo_client() if collection.count_documents({}) == 0: print("🟢 No data found. Inserting dataset...") csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv" try: response = requests.get(csv_url) response.raise_for_status() df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1") collection.insert_many(df.to_dict("records")) print("✅ Data Inserted into MongoDB!") except Exception as e: print(f"❌ Error loading dataset: {e}") # Function to get dataset summary from MongoDB. def get_dataset_summary(): collection = get_mongo_client() # Aggregate counts for each sentiment target. pipeline = [ {"$group": {"_id": "$target", "count": {"$sum": 1}}} ] results = list(collection.aggregate(pipeline)) # Map the sentiment target values to labels. mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"} summary_parts = [] total = 0 for doc in results: target = str(doc["_id"]) count = doc["count"] total += count label = mapping.get(target, target) summary_parts.append(f"{label}: {count}") summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "." return summary