Ehrii's picture
Update main.py
f1c88da verified
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory
# Ensure consistent language detection results
DetectorFactory.seed = 0
# Set Hugging Face cache directory to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
app = FastAPI()
# Load the original tokenizer from the base model
original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis")
hf_token = os.getenv("HF_TOKEN")
# Load the fine-tuned model and pass the tokenizer explicitly
multilingual_model = pipeline(
"sentiment-analysis",
model="Ehrii/sentiment",
tokenizer=original_tokenizer,
token=hf_token
)
# English model remains unchanged
english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
class SentimentRequest(BaseModel):
text: str
class SentimentResponse(BaseModel):
original_text: str
language_detected: str
sentiment: str
confidence_score: float
def detect_language(text):
try:
return detect(text)
except Exception:
return "unknown"
@app.get("/")
def home():
return {"message": "Sentiment Analysis API is running!"}
@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
text = request.text
language = detect_language(text)
# Choose the appropriate model based on language
if language == "en":
result = english_model(text)
else:
result = multilingual_model(text)
# Debugging: Print model output
print("Model Output:", result)
# Handle label mapping if model uses generic labels
label_map = {
"LABEL_0": "negative",
"LABEL_1": "positive",
"LABEL_2": "neutral",
"0": "negative",
"1": "positive",
"2": "neutral",
"NEGATIVE": "negative",
"POSITIVE": "positive",
"NEUTRAL": "neutral",
}
# Ensure label exists and map it correctly
sentiment_label = result[0]["label"].upper() # Convert to uppercase for consistency
sentiment = label_map.get(sentiment_label, sentiment_label).lower() # Default fallback
return SentimentResponse(
original_text=text,
language_detected=language,
sentiment=sentiment, # Always "positive" or "negative"
confidence_score=result[0]["score"],
)