|
import pickle as pkl |
|
|
|
import re |
|
import numpy as np |
|
|
|
import nltk |
|
from nltk.stem import PorterStemmer |
|
from textblob import Word |
|
from nltk.corpus import stopwords |
|
|
|
nltk.download('stopwords') |
|
nltk.download('wordnet') |
|
|
|
def process_row(row): |
|
from string import punctuation |
|
row = re.sub('(\S+@\S+)(com|\s+com)', ' ', row) |
|
row = re.sub('(\S+@\S+)', ' ', row) |
|
punctuation = punctuation + '\n' + 'ββ,ββ-β' + '0123456789' |
|
row = ''.join(word for word in row if word not in punctuation) |
|
row = row.lower() |
|
stop = stopwords.words('english') |
|
row = ' '.join(word for word in row.split() if word not in stop ) |
|
row = " ".join([Word(word).lemmatize() for word in row.split()]) |
|
ps = PorterStemmer() |
|
row = " ".join([ps.stem(word) for word in row.split()]) |
|
row = re.sub('\s{1,}', ' ', row) |
|
row = " ".join([word for word in row.split() if len(word) > 2]) |
|
return row |
|
|
|
def predict_class(doc): |
|
model = pkl.load(open("logistic_model.pk","rb")) |
|
vectorizer = pkl.load(open("tfidf_vectorizer.pk","rb")) |
|
clean_doc=process_row(doc) |
|
vector =np.array(vectorizer.transform([clean_doc]).todense()) |
|
class_pred = model.predict(vector) |
|
|
|
return class_pred[0] |
|
|
|
|
|
|
|
import gradio as gr |
|
iface = gr.Interface(fn = predict_class, |
|
inputs = gr.Textbox(type="text", label="Enter Your Document"), |
|
|
|
outputs = [ |
|
gr.Textbox(type="text", value=". . . ", label="Predicted Class") |
|
], |
|
title = "News Class Predictor", |
|
description ="Predicts whether the News belongs to 'Politics'class or 'Sports' class") |
|
iface.launch(inline = False, debug = True) |