Spaces:
Running
Running
File size: 2,793 Bytes
95fc527 416991a 20712aa 4d93fa1 25b797a 20712aa 25b797a 9ba2aea 7c93a3d 604a2b6 7c93a3d 604a2b6 95fc527 416991a 589cfa5 20712aa 4d93fa1 09a08ee 9ba2aea 416991a 1294d13 416991a 604a2b6 9ba2aea 604a2b6 9ba2aea 416991a 589cfa5 416991a 1294d13 4d93fa1 416991a 604a2b6 589cfa5 604a2b6 416991a 20712aa b147674 25b797a 416991a b147674 25b797a 1294d13 b147674 20712aa b147674 416991a 604a2b6 b147674 54b27c2 4d93fa1 54b27c2 4d93fa1 604a2b6 b147674 25b797a 1d29239 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
from langdetect import detect, DetectorFactory
# Ensure consistent language detection results
DetectorFactory.seed = 0
# Set Hugging Face cache directory
os.environ["HF_HOME"] = "/tmp/huggingface_cache"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
# Create cache directory if it doesn't exist
cache_dir = os.environ["HF_HOME"]
os.makedirs(cache_dir, exist_ok=True)
# Retrieve Hugging Face token from environment variable
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise RuntimeError("Hugging Face token is missing! Please set the HF_TOKEN environment variable.")
# Set the Hugging Face token in the environment variable
os.environ["HUGGINGFACE_HUB_TOKEN"] = HF_TOKEN
app = FastAPI()
# Model names
MULTILINGUAL_MODEL_NAME = "Ehrii/sentiment"
MULTILINGUAL_TOKENIZER_NAME = "tabularisai/multilingual-sentiment-analysis"
ENGLISH_MODEL_NAME = "siebert/sentiment-roberta-large-english"
# Load multilingual sentiment model
try:
multilingual_tokenizer = AutoTokenizer.from_pretrained(
MULTILINGUAL_TOKENIZER_NAME,
cache_dir=cache_dir
)
multilingual_model = pipeline(
"sentiment-analysis",
model=MULTILINGUAL_MODEL_NAME,
tokenizer=multilingual_tokenizer
)
except Exception as e:
raise RuntimeError(f"Failed to load multilingual model: {e}")
# Load English sentiment model
try:
english_model = pipeline(
"sentiment-analysis",
model=ENGLISH_MODEL_NAME
)
except Exception as e:
raise RuntimeError(f"Failed to load English sentiment model: {e}")
class SentimentRequest(BaseModel):
text: str
class SentimentResponse(BaseModel):
original_text: str
language_detected: str
sentiment: str
confidence_score: float
def detect_language(text):
"""Detect the language of the given text."""
try:
return detect(text)
except Exception:
return "unknown"
@app.get("/")
def home():
return {"message": "Sentiment Analysis API is running!"}
@app.post("/analyze/", response_model=SentimentResponse)
def analyze_sentiment(request: SentimentRequest):
text = request.text.strip()
if not text:
raise HTTPException(status_code=400, detail="Text input cannot be empty.")
language = detect_language(text)
# Use English model if detected language is English; otherwise, use multilingual model
model = english_model if language == "en" else multilingual_model
result = model(text)
return SentimentResponse(
original_text=text,
language_detected=language,
sentiment=result[0]["label"].lower(),
confidence_score=result[0]["score"],
)
|