teste2 / app.py
LuxyR's picture
Update app.py
6481f49 verified
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
# ------------------------------
# Modelos de Classificação de Sentimentos
# ------------------------------
classifier_1 = pipeline(
"text-classification",
model=AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest"),
tokenizer=AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest"),
device=0 if torch.cuda.is_available() else -1
)
classifier_2 = pipeline(
"text-classification",
model=AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis"),
tokenizer=AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis"),
device=0 if torch.cuda.is_available() else -1
)
# ------------------------------
# Árbitro (modelo text2text)
# ------------------------------
arbitro = pipeline(
"text2text-generation",
model="google/flan-t5-large",
device=0 if torch.cuda.is_available() else -1
)
# ------------------------------
# Funções de Classificação
# ------------------------------
def classifier_1_predict(text):
result = classifier_1(text)[0]['label']
return result
def classifier_2_predict(text):
result = classifier_2(text)[0]['label']
return result
# ------------------------------
# Árbitro decide qual IA acertou e mostra os pesos
# ------------------------------
def judge_sentiment(text, result_1, result_2):
prompt = (
f"Sentence: \"{text}\"\n"
f"Model A prediction: {result_1} (uses labels: negative, neutral, positive)\n"
f"Model B prediction: {result_2} (uses labels: neg, neu, pos)\n\n"
"Interpret the real sentiment expressed in the sentence.\n"
"Judge whether each prediction is good or bad, and explain the parameters (weights) that influenced your decision.\n\n"
"Your judgment criteria (weights from 0 to 1):\n"
"- Semantic match (meaning alignment with the sentence)\n"
"- Tone match (emotional consistency)\n"
"- Label accuracy (correct label among known sentiment labels)\n\n"
"Respond ONLY in this format:\n"
"Model A: good | Model B: bad\n"
"Weights used:\n"
"- Semantic match: 0.4\n"
"- Tone match: 0.4\n"
"- Label accuracy: 0.2\n"
"Explanation: [your reasoning here]"
)
output = arbitro(prompt, max_new_tokens=150)[0]['generated_text'].strip()
return f"Model A: {result_1} | Model B: {result_2}\n🤖 Árbitro:\n{output}"
# ------------------------------
# Pipeline principal
# ------------------------------
def process_input(text):
result_1 = classifier_1_predict(text)
result_2 = classifier_2_predict(text)
decision = judge_sentiment(text, result_1, result_2)
return decision
iface = gr.Interface(
fn=process_input,
inputs="text",
outputs="text",
title="AI Sentiment Duel: Classificador de Sentimentos",
description=(
"Compare duas IAs na tarefa de identificar o sentimento de uma frase. "
"Uma terceira IA, chamada **árbitro**, decide qual resposta está mais correta e agora **explica os critérios (pesos) usados para julgar**.\n\n"
"**Critérios de julgamento do árbitro:**\n"
"- **Semantic match**: o quanto a resposta combina com o significado geral da frase.\n"
"- **Tone match**: o quanto a resposta combina com o tom emocional.\n"
"- **Label accuracy**: se o rótulo está entre os mais apropriados.\n\n"
"⚠️ Melhores resultados com frases em inglês.\n\n"
"💡 Exemplos:\n"
"- 'I absolutely loved the movie!'\n"
"- 'Not bad, but could be better.'"
)
)
iface.launch(share=True)