Spaces:

Ehrii
/

sentiment-analysis

Running

File size: 2,793 Bytes

95fc527
416991a
20712aa
4d93fa1
25b797a
20712aa
25b797a
 
 
9ba2aea
7c93a3d
 
604a2b6
7c93a3d
604a2b6
 
95fc527
416991a
 
 
 
 
589cfa5
 
 
20712aa
 
4d93fa1
09a08ee
9ba2aea
416991a
1294d13
416991a
 
604a2b6
9ba2aea
604a2b6
 
9ba2aea
416991a
 
 
589cfa5
416991a
 
 
1294d13
4d93fa1
416991a
604a2b6
 
589cfa5
604a2b6
416991a
 
20712aa
 
 
 
 
b147674
 
 
 
 
25b797a
416991a
b147674
25b797a
1294d13
b147674
20712aa
 
 
 
 
b147674
 
416991a
 
 
604a2b6
b147674
54b27c2
 
4d93fa1
54b27c2
4d93fa1
604a2b6
b147674
 
 
25b797a
 
1d29239

import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory

# Ensure consistent language detection results
DetectorFactory.seed = 0

# Set Hugging Face cache directory
os.environ["HF_HOME"] = "/tmp/huggingface_cache"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"

# Create cache directory if it doesn't exist
cache_dir = os.environ["HF_HOME"]
os.makedirs(cache_dir, exist_ok=True)

# Retrieve Hugging Face token from environment variable
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise RuntimeError("Hugging Face token is missing! Please set the HF_TOKEN environment variable.")

# Set the Hugging Face token in the environment variable
os.environ["HUGGINGFACE_HUB_TOKEN"] = HF_TOKEN

app = FastAPI()

# Model names
MULTILINGUAL_MODEL_NAME = "Ehrii/sentiment"
MULTILINGUAL_TOKENIZER_NAME = "tabularisai/multilingual-sentiment-analysis"
ENGLISH_MODEL_NAME = "siebert/sentiment-roberta-large-english"

# Load multilingual sentiment model
try:
    multilingual_tokenizer = AutoTokenizer.from_pretrained(
        MULTILINGUAL_TOKENIZER_NAME,
        cache_dir=cache_dir
    )

    multilingual_model = pipeline(
        "sentiment-analysis",
        model=MULTILINGUAL_MODEL_NAME,
        tokenizer=multilingual_tokenizer
    )
except Exception as e:
    raise RuntimeError(f"Failed to load multilingual model: {e}")

# Load English sentiment model
try:
    english_model = pipeline(
        "sentiment-analysis",
        model=ENGLISH_MODEL_NAME
    )
except Exception as e:
    raise RuntimeError(f"Failed to load English sentiment model: {e}")

class SentimentRequest(BaseModel):
    text: str

class SentimentResponse(BaseModel):
    original_text: str
    language_detected: str
    sentiment: str
    confidence_score: float

def detect_language(text):
    """Detect the language of the given text."""
    try:
        return detect(text)
    except Exception:
        return "unknown"

@app.get("/")
def home():
    return {"message": "Sentiment Analysis API is running!"}

@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
    text = request.text.strip()
    if not text:
        raise HTTPException(status_code=400, detail="Text input cannot be empty.")
    
    language = detect_language(text)
    
    # Use English model if detected language is English; otherwise, use multilingual model
    model = english_model if language == "en" else multilingual_model

    result = model(text)
    
    return SentimentResponse(
        original_text=text,
        language_detected=language,
        sentiment=result[0]["label"].lower(),
        confidence_score=result[0]["score"],
    )