Spaces:
Sleeping
Sleeping
import pandas as pd | |
import requests | |
import io | |
from pymongo import MongoClient | |
def get_mongo_client(): | |
client = MongoClient("mongodb+srv://groupA:[email protected]/?retryWrites=true&w=majority&appName=SentimentCluster") | |
db = client["sentiment_db"] | |
return db["tweets"] | |
def insert_data_if_empty(): | |
collection = get_mongo_client() | |
if collection.count_documents({}) == 0: | |
print("π’ No data found. Inserting dataset...") | |
csv_url = "https://huggingface.co/spaces/sharangrav24/SentimentAnalysis/resolve/main/sentiment140.csv" | |
try: | |
response = requests.get(csv_url) | |
response.raise_for_status() | |
df = pd.read_csv(io.StringIO(response.text), encoding="ISO-8859-1") | |
# Add default fields if not present. | |
if "user" not in df.columns: | |
df["user"] = "Unknown" | |
if "date" not in df.columns: | |
df["date"] = "Unknown" | |
collection.insert_many(df.to_dict("records")) | |
print("β Data Inserted into MongoDB!") | |
except Exception as e: | |
print(f"β Error loading dataset: {e}") | |
def get_entry_by_index(index=0): | |
collection = get_mongo_client() | |
# Fetch the document by skipping "index" entries. | |
doc_cursor = collection.find({}, {"_id": 0}).skip(index).limit(1) | |
docs = list(doc_cursor) | |
if docs: | |
return docs[0] | |
return None | |