|
import gradio as gr |
|
import torch |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
|
|
|
|
classifier_1 = pipeline( |
|
"text-classification", |
|
model=AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest"), |
|
tokenizer=AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest"), |
|
device=0 if torch.cuda.is_available() else -1 |
|
) |
|
|
|
classifier_2 = pipeline( |
|
"text-classification", |
|
model=AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis"), |
|
tokenizer=AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis"), |
|
device=0 if torch.cuda.is_available() else -1 |
|
) |
|
|
|
|
|
|
|
|
|
arbitro = pipeline( |
|
"text2text-generation", |
|
model="google/flan-t5-large", |
|
device=0 if torch.cuda.is_available() else -1 |
|
) |
|
|
|
|
|
|
|
|
|
def classifier_1_predict(text): |
|
result = classifier_1(text)[0]['label'] |
|
return result |
|
|
|
def classifier_2_predict(text): |
|
result = classifier_2(text)[0]['label'] |
|
return result |
|
|
|
|
|
|
|
|
|
def judge_sentiment(text, result_1, result_2): |
|
prompt = ( |
|
f"Sentence: \"{text}\"\n" |
|
f"Model A prediction: {result_1} (uses labels: negative, neutral, positive)\n" |
|
f"Model B prediction: {result_2} (uses labels: neg, neu, pos)\n\n" |
|
"Interpret the real sentiment expressed in the sentence.\n" |
|
"Judge whether each prediction is good or bad, and explain the parameters (weights) that influenced your decision.\n\n" |
|
"Your judgment criteria (weights from 0 to 1):\n" |
|
"- Semantic match (meaning alignment with the sentence)\n" |
|
"- Tone match (emotional consistency)\n" |
|
"- Label accuracy (correct label among known sentiment labels)\n\n" |
|
"Respond ONLY in this format:\n" |
|
"Model A: good | Model B: bad\n" |
|
"Weights used:\n" |
|
"- Semantic match: 0.4\n" |
|
"- Tone match: 0.4\n" |
|
"- Label accuracy: 0.2\n" |
|
"Explanation: [your reasoning here]" |
|
) |
|
output = arbitro(prompt, max_new_tokens=150)[0]['generated_text'].strip() |
|
return f"Model A: {result_1} | Model B: {result_2}\n🤖 Árbitro:\n{output}" |
|
|
|
|
|
|
|
|
|
def process_input(text): |
|
result_1 = classifier_1_predict(text) |
|
result_2 = classifier_2_predict(text) |
|
decision = judge_sentiment(text, result_1, result_2) |
|
return decision |
|
|
|
iface = gr.Interface( |
|
fn=process_input, |
|
inputs="text", |
|
outputs="text", |
|
title="AI Sentiment Duel: Classificador de Sentimentos", |
|
description=( |
|
"Compare duas IAs na tarefa de identificar o sentimento de uma frase. " |
|
"Uma terceira IA, chamada **árbitro**, decide qual resposta está mais correta e agora **explica os critérios (pesos) usados para julgar**.\n\n" |
|
"**Critérios de julgamento do árbitro:**\n" |
|
"- **Semantic match**: o quanto a resposta combina com o significado geral da frase.\n" |
|
"- **Tone match**: o quanto a resposta combina com o tom emocional.\n" |
|
"- **Label accuracy**: se o rótulo está entre os mais apropriados.\n\n" |
|
"⚠️ Melhores resultados com frases em inglês.\n\n" |
|
"💡 Exemplos:\n" |
|
"- 'I absolutely loved the movie!'\n" |
|
"- 'Not bad, but could be better.'" |
|
) |
|
) |
|
|
|
iface.launch(share=True) |
|
|