Spaces:

Ehrii
/

sentiment-analysis

Running

File size: 2,498 Bytes

95fc527
7eced3d
20712aa
4d93fa1
25b797a
20712aa
25b797a
 
 
7eced3d
 
 
589cfa5
20712aa
 
7eced3d
 
9ba2aea
04c4b48
4ef5369
7eced3d
 
 
04c4b48
 
 
7eced3d
1294d13
7eced3d
 
20712aa
 
 
 
 
b147674
 
 
 
 
25b797a
b147674
25b797a
1294d13
b147674
20712aa
 
 
 
 
b147674
 
7eced3d
b147674
54b27c2
7eced3d
 
 
 
 
 
e492073
 
 
 
 
 
 
f1c88da
e492073
 
f1c88da
e492073
 
f1c88da
e492073
 
 
 
 
 
b147674
 
 
e492073
25b797a
1d29239

import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory

# Ensure consistent language detection results
DetectorFactory.seed = 0

# Set Hugging Face cache directory to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)

app = FastAPI()

# Load the original tokenizer from the base model
original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis")

hf_token = os.getenv("HF_TOKEN")

# Load the fine-tuned model and pass the tokenizer explicitly
multilingual_model = pipeline(
    "sentiment-analysis",
    model="Ehrii/sentiment",
    tokenizer=original_tokenizer,
    token=hf_token
)

# English model remains unchanged
english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")

class SentimentRequest(BaseModel):
    text: str

class SentimentResponse(BaseModel):
    original_text: str
    language_detected: str
    sentiment: str
    confidence_score: float

def detect_language(text):
    try:
        return detect(text)
    except Exception:
        return "unknown"

@app.get("/")
def home():
    return {"message": "Sentiment Analysis API is running!"}

@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
    text = request.text
    language = detect_language(text)

    # Choose the appropriate model based on language
    if language == "en":
        result = english_model(text)
    else:
        result = multilingual_model(text)

    # Debugging: Print model output
    print("Model Output:", result)

    # Handle label mapping if model uses generic labels
    label_map = {
        "LABEL_0": "negative",
        "LABEL_1": "positive",
        "LABEL_2": "neutral",
        "0": "negative",
        "1": "positive",
        "2": "neutral",
        "NEGATIVE": "negative",
        "POSITIVE": "positive",
        "NEUTRAL": "neutral",
    }

    # Ensure label exists and map it correctly
    sentiment_label = result[0]["label"].upper()  # Convert to uppercase for consistency
    sentiment = label_map.get(sentiment_label, sentiment_label).lower()  # Default fallback

    return SentimentResponse(
        original_text=text,
        language_detected=language,
        sentiment=sentiment,  # Always "positive" or "negative"
        confidence_score=result[0]["score"],
    )