import os from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline, AutoTokenizer from langdetect import detect, DetectorFactory # Ensure consistent language detection results DetectorFactory.seed = 0 # Set Hugging Face cache directory to a writable location os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" os.makedirs(os.environ["HF_HOME"], exist_ok=True) app = FastAPI() # Model names multilingual_model_name = "johndoee/sentiment" english_model_name = "siebert/sentiment-roberta-large-english" # Load tokenizer and model for multilingual sentiment analysis multilingual_tokenizer = AutoTokenizer.from_pretrained(multilingual_model_name) multilingual_model = pipeline( "sentiment-analysis", model=multilingual_model_name, tokenizer=multilingual_tokenizer ) # Load English sentiment model english_model = pipeline("sentiment-analysis", model=english_model_name) class SentimentRequest(BaseModel): text: str class SentimentResponse(BaseModel): original_text: str language_detected: str sentiment: str confidence_score: float def detect_language(text): try: return detect(text) except Exception: return "unknown" @app.get("/") def home(): return {"message": "Sentiment Analysis API is running!"} @app.post("/analyze/", response_model=SentimentResponse) def analyze_sentiment(request: SentimentRequest): text = request.text language = detect_language(text) # Choose the appropriate model based on detected language model = english_model if language == "en" else multilingual_model result = model(text) return SentimentResponse( original_text=text, language_detected=language, sentiment=result[0]["label"].lower(), confidence_score=result[0]["score"], )