import os from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline, AutoTokenizer from langdetect import detect, DetectorFactory # Ensure consistent language detection results DetectorFactory.seed = 0 # Set Hugging Face cache directory to a writable location os.environ["HF_HOME"] = "/tmp/huggingface" os.makedirs(os.environ["HF_HOME"], exist_ok=True) app = FastAPI() # Load the original tokenizer from the base model original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis") # Load the fine-tuned model and pass the tokenizer explicitly multilingual_model = pipeline( "sentiment-analysis", model="Ehrii/sentiment", tokenizer=original_tokenizer ) # English model remains unchanged english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english") class SentimentRequest(BaseModel): text: str class SentimentResponse(BaseModel): original_text: str language_detected: str sentiment: str confidence_score: float def detect_language(text): try: return detect(text) except Exception: return "unknown" @app.get("/") def home(): return {"message": "Sentiment Analysis API is running!"} @app.post("/analyze/", response_model=SentimentResponse) def analyze_sentiment(request: SentimentRequest): text = request.text language = detect_language(text) # Choose the appropriate model based on language if language == "en": result = english_model(text) else: result = multilingual_model(text) return SentimentResponse( original_text=text, language_detected=language, sentiment=result[0]["label"].lower(), confidence_score=result[0]["score"], )