File size: 2,758 Bytes
0bf414e
 
 
04919db
 
0bf414e
 
 
 
 
 
 
 
37412ce
 
0bf414e
 
c5dedae
 
 
 
 
 
a7bf09d
 
6134b21
 
0bf414e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from huggingface_hub import login
import os

# Load the original pre-trained model
def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    return tokenizer, model

# Models to compare
original_model_name = "Vishwas1/hummingbird-base-marathi"  # Replace with your original model
fine_tuned_model_name = "Vishwas1/hummingbird-finetuned-marathi"  # Replace with your fine-tuned model's repo ID

# Load models
hf_token = os.getenv('HUGGINGFACE_TOKEN')
    if not hf_token:
        return "Error: Hugging Face token not found. Please set it as a secret."
    
# Login to Hugging Face Hub
login(hf_token)
original_tokenizer = AutoTokenizer.from_pretrained(original_model_name)
original_model = AutoModelForSequenceClassification.from_pretrained(original_model_name)
fine_tuned_tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)
fine_tuned_model = AutoModelForSequenceClassification.from_pretrained(fine_tuned_model_name)

# Ensure models are in evaluation mode
original_model.eval()
fine_tuned_model.eval()

def compare_models(text):
    # Original model prediction
    inputs_orig = original_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs_orig = original_model(**inputs_orig)
    logits_orig = outputs_orig.logits
    probs_orig = torch.softmax(logits_orig, dim=1)
    pred_orig = torch.argmax(probs_orig, dim=1).item()
    confidence_orig = probs_orig[0][pred_orig].item()

    # Fine-tuned model prediction
    inputs_fine = fine_tuned_tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs_fine = fine_tuned_model(**inputs_fine)
    logits_fine = outputs_fine.logits
    probs_fine = torch.softmax(logits_fine, dim=1)
    pred_fine = torch.argmax(probs_fine, dim=1).item()
    confidence_fine = probs_fine[0][pred_fine].item()

    # Map predictions to labels (adjust based on your model's labels)
    labels = {0: "Negative", 1: "Positive"}

    result = {
        "Original Model Prediction": f"{labels[pred_orig]} ({confidence_orig:.2f})",
        "Fine-Tuned Model Prediction": f"{labels[pred_fine]} ({confidence_fine:.2f})"
    }
    return result

# Gradio Interface
iface = gr.Interface(
    fn=compare_models,
    inputs=gr.Textbox(lines=5, placeholder="Enter text here...", label="Input Text"),
    outputs=gr.JSON(label="Model Predictions"),
    title="Compare Original and Fine-Tuned Models",
    description="Enter text to see predictions from the original and fine-tuned models."
)

iface.launch()