|
import gradio as gr |
|
import re |
|
from transformers import pipeline |
|
from googlesearch import search |
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
def get_google_description(keyword): |
|
query = keyword |
|
results = search(query, num_results=1, lang='en') |
|
|
|
for result in results: |
|
description = get_description_from_url(result) |
|
if description: |
|
return description |
|
|
|
return keyword |
|
|
|
def get_description_from_url(url): |
|
response = requests.get(url, timeout=5) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
description_tag = soup.find('meta', {'name': 'description'}) |
|
|
|
if description_tag: |
|
return description_tag.get('content') |
|
|
|
return None |
|
|
|
|
|
title = "Fold: Contextual Tag Recommendation System" |
|
description = "powered by bart-large-mnli, made by @abhisheky127" |
|
|
|
classifier = pipeline("zero-shot-classification", |
|
model="facebook/bart-large-mnli") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def zero_shot(doc, candidates): |
|
given_labels = candidates.split(",") |
|
given_labels = list(map(str.strip, given_labels)) |
|
doc = preprocess(doc) |
|
print(doc) |
|
dictionary = classifier(doc, given_labels) |
|
labels = dictionary['labels'] |
|
scores = dictionary['scores'] |
|
return dict(zip(labels, scores)) |
|
|
|
def preprocess(text): |
|
|
|
cleaned_text = re.sub(r'\d', '', text) |
|
|
|
|
|
cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text) |
|
|
|
|
|
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() |
|
|
|
|
|
cleaned_text = cleaned_text.upper() |
|
|
|
|
|
words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT", "XXXXXXXXXXXX"] |
|
cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove]) |
|
|
|
|
|
cleaned_text = cleaned_text.lower() |
|
|
|
cleaned_text = get_google_description(doc) |
|
|
|
return cleaned_text |
|
|
|
|
|
|
|
|
|
input1 = gr.Textbox(label="Text") |
|
|
|
|
|
input2 = gr.Textbox(label="Labels") |
|
|
|
|
|
output = gr.Label(label="Output") |
|
|
|
|
|
transactions_and_tags = [ |
|
["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel"], |
|
["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel"], |
|
["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel"], |
|
["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel"], |
|
["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel"], |
|
["POS XXXXXXXXXXXX1111 IKEA INDIA PVT L", "Medical, Food, Shopping, Subscription, Travel"], |
|
["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel"], |
|
["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel"], |
|
["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel"], |
|
["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel"], |
|
["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel"] |
|
] |
|
|
|
|
|
gui = gr.Interface(title=title, |
|
description=description, |
|
fn=zero_shot, |
|
inputs=[input1, input2], |
|
outputs=[output], |
|
examples=transactions_and_tags) |
|
|
|
|
|
gui.launch(debug=True) |