Spaces:

Ehrii
/

sentiment-analysis

Running

File size: 2,100 Bytes

import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory
from huggingface_hub import hf_hub_download

# Ensure consistent language detection results
DetectorFactory.seed = 0

# Set Hugging Face cache to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)

app = FastAPI()

# Load tokenizer for multilingual model
original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis")

# Dynamically download fine-tuned model (avoid storage issues)
try:
    MODEL_PATH = hf_hub_download(repo_id="johndoee/sentiment", filename="pytorch_model.bin")
    multilingual_model = pipeline(
        "sentiment-analysis",
        model="johndoee/sentiment",
        tokenizer=original_tokenizer
    )
except Exception as e:
    raise RuntimeError(f"❌ Error loading model 'johndoee/sentiment': {e}")

# Load the English sentiment model
english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")

class SentimentRequest(BaseModel):
    text: str

class SentimentResponse(BaseModel):
    original_text: str
    language_detected: str
    sentiment: str
    confidence_score: float

def detect_language(text):
    try:
        return detect(text)
    except Exception:
        return "unknown"

@app.get("/")
def home():
    return {"message": "Sentiment Analysis API is running!"}

@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
    text = request.text
    language = detect_language(text)

    # Choose the appropriate model based on language
    if language == "en":
        result = english_model(text)
    else:
        result = multilingual_model(text)

    return SentimentResponse(
        original_text=text,
        language_detected=language,
        sentiment=result[0]["label"].lower(),
        confidence_score=result[0]["score"],
    )