import os from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import pipeline, AutoTokenizer from langdetect import detect, DetectorFactory from huggingface_hub import hf_hub_download # Ensure consistent language detection results DetectorFactory.seed = 0 # Set Hugging Face cache to a writable location os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" os.makedirs(os.environ["HF_HOME"], exist_ok=True) app = FastAPI() # Load tokenizer for multilingual model original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis") # Dynamically download fine-tuned model (avoid storage issues) try: MODEL_PATH = hf_hub_download(repo_id="johndoee/sentiment", filename="pytorch_model.bin") multilingual_model = pipeline( "sentiment-analysis", model="johndoee/sentiment", tokenizer=original_tokenizer ) except Exception as e: raise RuntimeError(f"❌ Error loading model 'johndoee/sentiment': {e}") # Load the English sentiment model english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english") class SentimentRequest(BaseModel): text: str class SentimentResponse(BaseModel): original_text: str language_detected: str sentiment: str confidence_score: float def detect_language(text): try: return detect(text) except Exception: return "unknown" @app.get("/") def home(): return {"message": "Sentiment Analysis API is running!"} @app.post("/analyze/", response_model=SentimentResponse) def analyze_sentiment(request: SentimentRequest): text = request.text language = detect_language(text) # Choose the appropriate model based on language if language == "en": result = english_model(text) else: result = multilingual_model(text) return SentimentResponse( original_text=text, language_detected=language, sentiment=result[0]["label"].lower(), confidence_score=result[0]["score"], )