Spaces:
Sleeping
Sleeping
import pandas as pd | |
import requests | |
import io | |
from pymongo import MongoClient | |
def get_mongo_client(): | |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster") | |
db = client["sentiment_db"] | |
return db["tweets"] | |
def insert_data_if_empty(): | |
collection = get_mongo_client() | |
if collection.count_documents({}) == 0: | |
print("π’ No data found. Inserting dataset...") | |
csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv" | |
try: | |
response = requests.get(csv_url) | |
response.raise_for_status() | |
df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1") | |
# Add default fields if not present. | |
if "user" not in df.columns: | |
df["user"] = "Unknown" | |
if "date" not in df.columns: | |
df["date"] = "Unknown" | |
collection.insert_many(df.to_dict("records")) | |
print("β Data Inserted into MongoDB!") | |
except Exception as e: | |
print(f"β Error loading dataset: {e}") | |
def get_dataset_summary(): | |
collection = get_mongo_client() | |
pipeline = [ | |
{"$group": {"_id": "$target", "count": {"$sum": 1}}} | |
] | |
results = list(collection.aggregate(pipeline)) | |
mapping = {"0": "Negative", "2": "Neutral", "4": "Positive"} | |
summary_parts = [] | |
total = 0 | |
for doc in results: | |
target = str(doc["_id"]) | |
count = doc["count"] | |
total += count | |
label = mapping.get(target, target) | |
summary_parts.append(f"{label}: {count}") | |
summary = f"Total tweets: {total}. " + ", ".join(summary_parts) + "." | |
return summary | |
def get_entry_by_index(index=0): | |
collection = get_mongo_client() | |
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1) | |
docs = list(doc_cursor) | |
if docs: | |
return docs[0] | |
return None | |