File size: 2,786 Bytes
d4311c6 71b8046 d4311c6 71b8046 d4311c6 fdfe9b6 d4311c6 fdfe9b6 d4311c6 fdfe9b6 d4311c6 fdfe9b6 d4311c6 fdfe9b6 d4311c6 fdfe9b6 d4311c6 fdfe9b6 d4311c6 23f90e0 d4311c6 23f90e0 d4311c6 71b8046 d4311c6 23f90e0 d4311c6 8cb2f26 d4311c6 71b8046 d4311c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline
import torch
import gradio as gr
from openpyxl import load_workbook
from numpy import mean
tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
tokenizer_keywords = AutoTokenizer.from_pretrained("transformer3/H2-keywordextractor")
model_keywords = AutoModelForSeq2SeqLM.from_pretrained("transformer3/H2-keywordextractor")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the fine-tuned model and tokenizer
new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')
new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')
# Create a classification pipeline
classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)
# Add label mapping for sentiment analysis
label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}
def parse_xl(file_path):
cells = []
workbook = load_workbook(filename=file_path)
for sheet in workbook.worksheets:
for row in sheet.iter_rows():
for cell in row:
if cell.value != None:
cells.append(cell.value)
return cells
def evaluate(file):
reviews = parse_xl(file)
ratings = []
text = ""
for review in reviews:
ratings.append(int(classifier(review)[0]['label'].split('_')[1]))
text += review
text += " "
inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=50, max_length=1000)
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors="pt")
summary_ids_keywords = model_keywords.generate(inputs_keywords["input_ids"], num_beams=2, min_length=0, max_length=100)
keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
return round(mean(ratings), 2), summary, keywords
iface = gr.Interface(
fn=evaluate,
inputs=gr.File(label="Reviews", file_types=[".xlsx", ".xlsm", ".xltx", ".xltm"]),
outputs=[gr.Textbox(label="Rating"), gr.Textbox(label="Summary"), gr.Textbox(label="Keywords")],
title='Summarize Reviews',
description="Evaluate and summarize collection of reviews. Reviews are submitted as an Excel file, where each reviews is in its own cell."
)
iface.launch(share=True) |