File size: 2,370 Bytes
0bf414e
3bc0a38
0bf414e
04919db
 
0bf414e
06760de
0bf414e
 
30579a3
0bf414e
 
 
828331c
 
0bf414e
06760de
03b46bf
3bc0a38
 
 
c5dedae
 
3bc0a38
 
 
 
0bf414e
 
 
 
 
06760de
 
 
 
0bf414e
06760de
 
0bf414e
06760de
 
0bf414e
06760de
 
0bf414e
06760de
0bf414e
06760de
 
0bf414e
 
 
 
 
 
 
06760de
 
 
0bf414e
 
 
3bc0a38
 
06760de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import login
import os

# Load text generation model
def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

# Models to compare
original_model_name = "Vishwas1/hummingbird-base-marathi-finetuned"  
fine_tuned_model_name = "Vishwas1/hummingbird-base-marathi-finetuned-finetuned"  

# Load Hugging Face token
hf_token = os.getenv('HF_API_TOKEN')
if not hf_token:
    raise ValueError("Error: Hugging Face token not found. Please set it as an environment variable.")

# Login to Hugging Face Hub
login(hf_token)

# Load the original and fine-tuned models
original_tokenizer, original_model = load_model(original_model_name)
fine_tuned_tokenizer, fine_tuned_model = load_model(fine_tuned_model_name)

# Ensure models are in evaluation mode
original_model.eval()
fine_tuned_model.eval()

# Function to compare text generation from both models
def compare_models(prompt):
    # Generate text with the original model
    inputs_orig = original_tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        generated_ids_orig = original_model.generate(inputs_orig["input_ids"], max_length=100)
    generated_text_orig = original_tokenizer.decode(generated_ids_orig[0], skip_special_tokens=True)

    # Generate text with the fine-tuned model
    inputs_fine = fine_tuned_tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        generated_ids_fine = fine_tuned_model.generate(inputs_fine["input_ids"], max_length=100)
    generated_text_fine = fine_tuned_tokenizer.decode(generated_ids_fine[0], skip_special_tokens=True)

    # Return the generated text from both models for comparison
    result = {
        "Original Model Output": generated_text_orig,
        "Fine-Tuned Model Output": generated_text_fine
    }
    return result

# Gradio Interface
iface = gr.Interface(
    fn=compare_models,
    inputs=gr.Textbox(lines=5, placeholder="Enter text here...", label="Input Text"),
    outputs=gr.JSON(label="Generated Texts"),
    title="Compare Text Generation from Original and Fine-Tuned Models",
    description="Enter a prompt to generate text from the original and fine-tuned models."
)

iface.launch()