File size: 2,786 Bytes
d4311c6
 
 
71b8046
d4311c6
 
71b8046
d4311c6
 
fdfe9b6
d4311c6
 
fdfe9b6
d4311c6
 
 
 
fdfe9b6
 
d4311c6
 
fdfe9b6
d4311c6
 
fdfe9b6
d4311c6
 
fdfe9b6
d4311c6
 
 
 
 
 
fdfe9b6
d4311c6
23f90e0
d4311c6
 
 
 
23f90e0
d4311c6
 
 
 
 
 
 
 
71b8046
d4311c6
 
 
23f90e0
d4311c6
8cb2f26
d4311c6
 
 
 
 
 
 
71b8046
d4311c6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline
import torch
import gradio as gr
from openpyxl import load_workbook
from numpy import mean

tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")

tokenizer_keywords = AutoTokenizer.from_pretrained("transformer3/H2-keywordextractor")
model_keywords = AutoModelForSeq2SeqLM.from_pretrained("transformer3/H2-keywordextractor")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the fine-tuned model and tokenizer
new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')
new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')


# Create a classification pipeline
classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)

# Add label mapping for sentiment analysis
label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}

def parse_xl(file_path):
    cells = []

    workbook = load_workbook(filename=file_path)
    for sheet in workbook.worksheets:
        for row in sheet.iter_rows():
            for cell in row:
                if cell.value != None:
                    cells.append(cell.value)

    return cells

def evaluate(file):
    reviews = parse_xl(file)
    ratings = []
    text = ""

    for review in reviews:
        ratings.append(int(classifier(review)[0]['label'].split('_')[1]))
        text += review
        text += " "
            
    inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=50, max_length=1000)
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

    inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors="pt")
    summary_ids_keywords = model_keywords.generate(inputs_keywords["input_ids"], num_beams=2, min_length=0, max_length=100)
    keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]   

    return round(mean(ratings), 2), summary, keywords

iface = gr.Interface(
    fn=evaluate,
    inputs=gr.File(label="Reviews", file_types=[".xlsx", ".xlsm", ".xltx", ".xltm"]),
    outputs=[gr.Textbox(label="Rating"), gr.Textbox(label="Summary"), gr.Textbox(label="Keywords")],
    title='Summarize Reviews',
    description="Evaluate and summarize collection of reviews. Reviews are submitted as an Excel file, where each reviews is in its own cell."
)

iface.launch(share=True)