Spaces:

Ehrii
/

sentiment-analysis

Running

File size: 2,953 Bytes

import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory

# Ensure consistent language detection results
DetectorFactory.seed = 0

# Set Hugging Face cache directory to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"

# Create cache directory with proper permissions
cache_dir = os.environ["HF_HOME"]
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o755)  # Set read/write/execute permissions for owner

# Retrieve Hugging Face token from environment variable
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise RuntimeError("Hugging Face token is missing! Please set the HF_TOKEN environment variable.")

app = FastAPI()

# Model names
MULTILINGUAL_MODEL_NAME = "Ehrii/sentiment"
ENGLISH_MODEL_NAME = "siebert/sentiment-roberta-large-english"

# Load multilingual sentiment model
try:
    multilingual_tokenizer = AutoTokenizer.from_pretrained(
        MULTILINGUAL_MODEL_NAME,
        token=HF_TOKEN,  # Use 'token' instead of deprecated 'use_auth_token'
        cache_dir=cache_dir
    )
    multilingual_model = pipeline(
        "sentiment-analysis",
        model=MULTILINGUAL_MODEL_NAME,
        tokenizer=multilingual_tokenizer,
        token=HF_TOKEN,  # Use 'token' instead of deprecated 'use_auth_token'
        cache_dir=cache_dir
    )
except Exception as e:
    raise RuntimeError(f"Failed to load multilingual model: {e}")

# Load English sentiment model
try:
    english_model = pipeline(
        "sentiment-analysis",
        model=ENGLISH_MODEL_NAME,
        token=HF_TOKEN,  # Use 'token' instead of deprecated 'use_auth_token'
        cache_dir=cache_dir
    )
except Exception as e:
    raise RuntimeError(f"Failed to load English sentiment model: {e}")

class SentimentRequest(BaseModel):
    text: str

class SentimentResponse(BaseModel):
    original_text: str
    language_detected: str
    sentiment: str
    confidence_score: float

def detect_language(text):
    """Detect the language of the given text."""
    try:
        return detect(text)
    except Exception:
        return "unknown"

@app.get("/")
def home():
    return {"message": "Sentiment Analysis API is running!"}

@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
    text = request.text.strip()
    if not text:
        raise HTTPException(status_code=400, detail="Text input cannot be empty.")
    
    language = detect_language(text)
    # Choose the appropriate model based on detected language
    model = english_model if language == "en" else multilingual_model
    result = model(text)
    
    return SentimentResponse(
        original_text=text,
        language_detected=language,
        sentiment=result[0]["label"].lower(),
        confidence_score=result[0]["score"],
    )