Spaces:
Sleeping
Sleeping
import pandas as pd | |
import requests | |
import io | |
from pymongo import MongoClient | |
# Function to connect to MongoDB. | |
def get_mongo_client(): | |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster") | |
db = client["sentiment_db"] | |
return db["tweets"] | |
# Function to insert data if the collection is empty. | |
def insert_data_if_empty(): | |
collection = get_mongo_client() | |
if collection.count_documents({}) == 0: | |
print("π’ No data found. Inserting dataset...") | |
csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv" | |
try: | |
response = requests.get(csv_url) | |
response.raise_for_status() | |
df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1") | |
collection.insert_many(df.to_dict("records")) | |
print("β Data Inserted into MongoDB!") | |
except Exception as e: | |
print(f"β Error loading dataset: {e}") | |
# Function to get dataset summary from MongoDB. | |
def get_dataset_summary(): | |
collection = get_mongo_client() | |
# Aggregate counts for each sentiment target. | |
pipeline = [ | |
{"$group": {"_id": "$target", "count": {"$sum": 1}}} | |
] | |
results = list(collection.aggregate(pipeline)) | |
# Map the sentiment target values to labels. | |
mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"} | |
summary_parts = [] | |
total = 0 | |
for doc in results: | |
target = str(doc["_id"]) | |
count = doc["count"] | |
total += count | |
label = mapping.get(target, target) | |
summary_parts.append(f"{label}: {count}") | |
summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "." | |
return summary | |