import gradio as gr import torch from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification # ------------------------------ # Modelos de Classificação de Sentimentos # ------------------------------ classifier_1 = pipeline( "text-classification", model=AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest"), tokenizer=AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest"), device=0 if torch.cuda.is_available() else -1 ) classifier_2 = pipeline( "text-classification", model=AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis"), tokenizer=AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis"), device=0 if torch.cuda.is_available() else -1 ) # ------------------------------ # Árbitro (modelo text2text) # ------------------------------ arbitro = pipeline( "text2text-generation", model="google/flan-t5-large", device=0 if torch.cuda.is_available() else -1 ) # ------------------------------ # Funções de Classificação # ------------------------------ def classifier_1_predict(text): result = classifier_1(text)[0]['label'] return result def classifier_2_predict(text): result = classifier_2(text)[0]['label'] return result # ------------------------------ # Árbitro decide qual IA acertou e mostra os pesos # ------------------------------ def judge_sentiment(text, result_1, result_2): prompt = ( f"Sentence: \"{text}\"\n" f"Model A prediction: {result_1} (uses labels: negative, neutral, positive)\n" f"Model B prediction: {result_2} (uses labels: neg, neu, pos)\n\n" "Interpret the real sentiment expressed in the sentence.\n" "Judge whether each prediction is good or bad, and explain the parameters (weights) that influenced your decision.\n\n" "Your judgment criteria (weights from 0 to 1):\n" "- Semantic match (meaning alignment with the sentence)\n" "- Tone match (emotional consistency)\n" "- Label accuracy (correct label among known sentiment labels)\n\n" "Respond ONLY in this format:\n" "Model A: good | Model B: bad\n" "Weights used:\n" "- Semantic match: 0.4\n" "- Tone match: 0.4\n" "- Label accuracy: 0.2\n" "Explanation: [your reasoning here]" ) output = arbitro(prompt, max_new_tokens=150)[0]['generated_text'].strip() return f"Model A: {result_1} | Model B: {result_2}\n🤖 Árbitro:\n{output}" # ------------------------------ # Pipeline principal # ------------------------------ def process_input(text): result_1 = classifier_1_predict(text) result_2 = classifier_2_predict(text) decision = judge_sentiment(text, result_1, result_2) return decision iface = gr.Interface( fn=process_input, inputs="text", outputs="text", title="AI Sentiment Duel: Classificador de Sentimentos", description=( "Compare duas IAs na tarefa de identificar o sentimento de uma frase. " "Uma terceira IA, chamada **árbitro**, decide qual resposta está mais correta e agora **explica os critérios (pesos) usados para julgar**.\n\n" "**Critérios de julgamento do árbitro:**\n" "- **Semantic match**: o quanto a resposta combina com o significado geral da frase.\n" "- **Tone match**: o quanto a resposta combina com o tom emocional.\n" "- **Label accuracy**: se o rótulo está entre os mais apropriados.\n\n" "⚠️ Melhores resultados com frases em inglês.\n\n" "💡 Exemplos:\n" "- 'I absolutely loved the movie!'\n" "- 'Not bad, but could be better.'" ) ) iface.launch(share=True)