File size: 1,861 Bytes
fc91528 1111046 fc91528 1111046 fc91528 872e81a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import pickle as pkl
import re
import numpy as np
import nltk
from nltk.stem import PorterStemmer
from textblob import Word
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('wordnet')
def process_row(row):
from string import punctuation
row = re.sub('(\S+@\S+)(com|\s+com)', ' ', row)
row = re.sub('(\S+@\S+)', ' ', row)
punctuation = punctuation + '\n' + 'ββ,ββ-β' + '0123456789'
row = ''.join(word for word in row if word not in punctuation)
row = row.lower()
stop = stopwords.words('english')
row = ' '.join(word for word in row.split() if word not in stop )
row = " ".join([Word(word).lemmatize() for word in row.split()])
ps = PorterStemmer()
row = " ".join([ps.stem(word) for word in row.split()])
row = re.sub('\s{1,}', ' ', row)
row = " ".join([word for word in row.split() if len(word) > 2])
return row
def predict_class(doc):
model = pkl.load(open("logistic_model.pk","rb"))
vectorizer = pkl.load(open("tfidf_vectorizer.pk","rb"))
clean_doc=process_row(doc)
vector =np.array(vectorizer.transform([clean_doc]).todense())
class_pred = model.predict(vector)
# print(class_pred[0])
return class_pred[0]
# predict_class(" Barack Obama is seeking for a conference to be conducted in USA")
import gradio as gr
iface = gr.Interface(fn = predict_class,
inputs = gr.Textbox(type="text", label="Enter Your Document"),
# outputs = ["text", "text"],
outputs = [
gr.Textbox(type="text", value=". . . ", label="Predicted Class")
],
title = "News Class Predictor",
description ="Predicts whether the News belongs to 'Politics'class or 'Sports' class")
iface.launch(inline = False, debug = True) |