Spaces:
Running
Running
| # Built-in | |
| import os | |
| from pathlib import Path | |
| from dotenv import load_dotenv | |
| import uvicorn | |
| import re | |
| # Dependencies for FastAPI | |
| from fastapi import FastAPI | |
| from fastapi.responses import RedirectResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| from huggingface_hub import login | |
| # Login | |
| # dotenv_path = Path('../model_space/model_generation/.env') | |
| # load_dotenv(dotenv_path=dotenv_path) | |
| HF_TOKEN = os.getenv('HF_TOKEN') | |
| login(token=HF_TOKEN) | |
| # Class for Text Body | |
| class Paragraph(BaseModel): | |
| input: str | |
| # Load the EmoRoBERTa Model | |
| emotion = pipeline("text-classification", model="arpanghoshal/EmoRoBERTa", top_k=None) | |
| # Start the app | |
| app = FastAPI() | |
| # Setup CORS policy | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| def remove_unknown_symbols(text): | |
| # Define a regular expression pattern to match characters that are not within the range of alphanumeric, space, and common punctuation characters | |
| pattern = re.compile(r'[^A-Za-z0-9\s.,?!\'"-]') | |
| # Replace unknown symbols with an empty string | |
| cleaned_text = re.sub(pattern, '', text) | |
| # Truncate the text if its length exceeds 1020 characters | |
| return cleaned_text # [:1020] | |
| # APIs | |
| async def docs(): | |
| return RedirectResponse(url="/docs") | |
| async def predict_emotions_emoroberta(paragraph : Paragraph): | |
| # Split the huge chunk of text into a list of strings | |
| text_list = [text.strip() for text in re.split(r'[.!?;\n]', paragraph.input) if text.strip()] | |
| # Create a list to store predictions per text | |
| predictions_per_text = [] | |
| for text in text_list: | |
| cleaned_text = remove_unknown_symbols(text) | |
| emotions = emotion(cleaned_text)[0] | |
| predictions_per_text.append(emotions) | |
| # Create a dictionary to aggregate scores for each label | |
| total = {} | |
| # Iterate over each list and aggregate the scores | |
| for prediction in predictions_per_text: | |
| for emotion_dict in prediction: | |
| label = emotion_dict['label'] | |
| score = emotion_dict['score'] | |
| total[label] = total.get(label, 0) + score | |
| # Convert the dictionary to a list of dictionaries | |
| result = [{"label": label, "score": score} for label, score in total.items()] | |
| # Sort the result in descending order based on score | |
| sorted_result = sorted(result, key=lambda x: x['score'], reverse=True) | |
| return {"predictions": sorted_result} | |
| # if __name__ == "__main__": | |
| # uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True) |