import pandas as pd import requests import io from pymongo import MongoClient def get_mongo_client(): client = MongoClient("mongodb+srv://groupA:pythongroupA@sentimentcluster.4usfj.mongodb.net/?retryWrites=true&w=majority&appName=SentimentCluster") db = client["sentiment_db"] return db["tweets"] def insert_data_if_empty(): collection = get_mongo_client() if collection.count_documents({}) == 0: print("🟢 No data found. Inserting dataset...") csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv" try: response = requests.get(csv_url) response.raise_for_status() df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1") # Add default fields if not present. if "user" not in df.columns: df["user"] = "Unknown" if "date" not in df.columns: df["date"] = "Unknown" collection.insert_many(df.to_dict("records")) print("✅ Data Inserted into MongoDB!") except Exception as e: print(f"❌ Error loading dataset: {e}") def get_dataset_summary(): collection = get_mongo_client() pipeline = [ {"$group": {"_id": "$target", "count": {"$sum": 1}}} ] results = list(collection.aggregate(pipeline)) mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"} summary_parts = [] total = 0 for doc in results: target = str(doc["_id"]) count = doc["count"] total += count label = mapping.get(target, target) summary_parts.append(f"{label}: {count}") summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "." return summary def get_entry_by_index(index=0): collection = get_mongo_client() doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1) docs = list(doc_cursor) if docs: return docs[0] return None