Spaces:
Running
Running
File size: 2,498 Bytes
95fc527 7eced3d 20712aa 4d93fa1 25b797a 20712aa 25b797a 7eced3d 589cfa5 20712aa 7eced3d 9ba2aea 04c4b48 4ef5369 7eced3d 04c4b48 7eced3d 1294d13 7eced3d 20712aa b147674 25b797a b147674 25b797a 1294d13 b147674 20712aa b147674 7eced3d b147674 54b27c2 7eced3d e492073 f1c88da e492073 f1c88da e492073 f1c88da e492073 b147674 e492073 25b797a 1d29239 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory
# Ensure consistent language detection results
DetectorFactory.seed = 0
# Set Hugging Face cache directory to a writable location
os.environ["HF_HOME"] = "/tmp/huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)
app = FastAPI()
# Load the original tokenizer from the base model
original_tokenizer = AutoTokenizer.from_pretrained("tabularisai/multilingual-sentiment-analysis")
hf_token = os.getenv("HF_TOKEN")
# Load the fine-tuned model and pass the tokenizer explicitly
multilingual_model = pipeline(
"sentiment-analysis",
model="Ehrii/sentiment",
tokenizer=original_tokenizer,
token=hf_token
)
# English model remains unchanged
english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
class SentimentRequest(BaseModel):
text: str
class SentimentResponse(BaseModel):
original_text: str
language_detected: str
sentiment: str
confidence_score: float
def detect_language(text):
try:
return detect(text)
except Exception:
return "unknown"
@app.get("/")
def home():
return {"message": "Sentiment Analysis API is running!"}
@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
text = request.text
language = detect_language(text)
# Choose the appropriate model based on language
if language == "en":
result = english_model(text)
else:
result = multilingual_model(text)
# Debugging: Print model output
print("Model Output:", result)
# Handle label mapping if model uses generic labels
label_map = {
"LABEL_0": "negative",
"LABEL_1": "positive",
"LABEL_2": "neutral",
"0": "negative",
"1": "positive",
"2": "neutral",
"NEGATIVE": "negative",
"POSITIVE": "positive",
"NEUTRAL": "neutral",
}
# Ensure label exists and map it correctly
sentiment_label = result[0]["label"].upper() # Convert to uppercase for consistency
sentiment = label_map.get(sentiment_label, sentiment_label).lower() # Default fallback
return SentimentResponse(
original_text=text,
language_detected=language,
sentiment=sentiment, # Always "positive" or "negative"
confidence_score=result[0]["score"],
)
|